File size: 3,538 Bytes
639a760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# CITATION.cff — Citation File Format
# https://citation-file-format.github.io/
# Used by HF, GitHub, Zenodo to render a "Cite this repository" UI.

cff-version: 1.2.0
message: "If you use this framework or its derivative artifacts, please cite as below."
type: software
title: "Composer 2.5 Replication Framework: Methodology and Integration Architecture for Open Replication of Cursor's Agentic Coding Recipe"
abstract: >
  An open-source methodology and integration architecture for replicating
  Cursor's Composer 2.5 recipe on a HuggingFace base model, plus a novel
  multi-teacher trace-replay distillation reward channel that complements
  the published SDPO/OPSD method (which Cursor's "Targeted RL with Textual
  Feedback" uses). Pre-experimental v0.0 release: methodology paper, audited
  recipe mapping, integration architecture across TRL/VeRL/OpenEnv,
  empirical economic-feasibility result for the novel channel ($0.98/trace),
  and a working code skeleton with 38 passing unit tests.

authors:
  - family-names: "Codeseys"
    given-names: ""
    affiliation: "Independent researcher"
    # Replace with real ORCID if available:
    # orcid: "https://orcid.org/0000-0000-0000-0000"

repository-code: "https://huggingface.co/Codeseys/composer-replication-framework"
url: "https://huggingface.co/Codeseys/composer-replication-framework"
date-released: "2026-05-25"
version: "0.0.0"
license: "MIT"

keywords:
  - reinforcement-learning
  - post-training
  - distillation
  - agentic-coding
  - composer-2.5
  - cursor
  - kimi-k2
  - grpo
  - dapo
  - sdpo
  - opsd
  - trl
  - verl
  - openenv
  - llm

# Primary upstream works this framework depends on / cites
references:
  - type: article
    title: "Introducing Composer 2.5"
    authors:
      - name: "Cursor Team"
    year: 2026
    url: "https://cursor.com/blog/composer-2-5"

  - type: article
    title: "Self-Distilled Reasoner: On-Policy Self-Distillation for Large Language Models"
    authors:
      - family-names: "Zhao"
        given-names: "Siyan"
      - family-names: "Xie"
        given-names: "Zhihui"
      - family-names: "Liu"
        given-names: "Mengchen"
      - family-names: "Huang"
        given-names: "Jing"
      - family-names: "Pang"
        given-names: "Guan"
      - family-names: "Chen"
        given-names: "Feiyu"
      - family-names: "Grover"
        given-names: "Aditya"
    year: 2026
    url: "https://arxiv.org/abs/2601.18734"
    notes: "OPSD — single-LLM self-distillation; provides the reference loss implementation lifted by this framework."

  - type: article
    title: "Reinforcement Learning via Self-Distillation"
    authors:
      - family-names: "Hübotter"
        given-names: "Jonas"
      - family-names: "Lübeck"
        given-names: "Frederike"
      - family-names: "Behric"
        given-names: "Lejs"
      - family-names: "Baumann"
        given-names: "Anton"
      - family-names: "Bagatella"
        given-names: "Marco"
      - family-names: "Marta"
        given-names: "Daniel"
      - family-names: "Hakimi"
        given-names: "Ido"
      - family-names: "Shenfeld"
        given-names: "Idan"
      - family-names: "Buening"
        given-names: "Thomas Kleine"
      - family-names: "Guestrin"
        given-names: "Carlos"
      - family-names: "Krause"
        given-names: "Andreas"
    year: 2026
    url: "https://arxiv.org/abs/2601.20802"
    notes: "SDPO — formalizes the same mechanism as Cursor's Targeted RL with Textual Feedback. ICLR 2026 Scaling Post-training Workshop."