Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Exercițiu Practic: Ajustează fin un model cu GRPO","local":"exercițiu-practic-ajustează-fin-un-model-cu-grpo","sections":[{"title":"Instalează dependențele","local":"instalează-dependențele","sections":[],"depth":2},{"title":"Importă și loghează în Weights & Biases","local":"importă-și-loghează-în-weights--biases","sections":[],"depth":2},{"title":"Încarcă setul de date","local":"încarcă-setul-de-date","sections":[],"depth":2},{"title":"Încarcă modelul","local":"încarcă-modelul","sections":[],"depth":2},{"title":"Încarcă LoRA","local":"încarcă-lora","sections":[],"depth":2},{"title":"Definește funcția de recompensă","local":"definește-funcția-de-recompensă","sections":[],"depth":2},{"title":"Definește argumentele de antrenament","local":"definește-argumentele-de-antrenament","sections":[],"depth":2},{"title":"Împinge modelul pe Hub în timpul antrenamentului","local":"împinge-modelul-pe-hub-în-timpul-antrenamentului","sections":[],"depth":2},{"title":"Interpretează rezultatele antrenamentului","local":"interpretează-rezultatele-antrenamentului","sections":[],"depth":2},{"title":"Salvează și publică modelul","local":"salvează-și-publică-modelul","sections":[],"depth":2},{"title":"Generează text","local":"generează-text","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/course/pr_1069/rum/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/entry/start.1de7c3d2.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/scheduler.37c15a92.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/singletons.e13b7dfd.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/index.18351ede.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/paths.e130b7b0.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/entry/app.1f82014c.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/index.2bf4358c.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/nodes/0.3c83e1ab.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/nodes/32.ea884e0f.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/Tip.363c041f.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/CodeBlock.4e987730.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/CourseFloatingBanner.6add7356.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/getInferenceSnippets.24b50994.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Exercițiu Practic: Ajustează fin un model cu GRPO","local":"exercițiu-practic-ajustează-fin-un-model-cu-grpo","sections":[{"title":"Instalează dependențele","local":"instalează-dependențele","sections":[],"depth":2},{"title":"Importă și loghează în Weights & Biases","local":"importă-și-loghează-în-weights--biases","sections":[],"depth":2},{"title":"Încarcă setul de date","local":"încarcă-setul-de-date","sections":[],"depth":2},{"title":"Încarcă modelul","local":"încarcă-modelul","sections":[],"depth":2},{"title":"Încarcă LoRA","local":"încarcă-lora","sections":[],"depth":2},{"title":"Definește funcția de recompensă","local":"definește-funcția-de-recompensă","sections":[],"depth":2},{"title":"Definește argumentele de antrenament","local":"definește-argumentele-de-antrenament","sections":[],"depth":2},{"title":"Împinge modelul pe Hub în timpul antrenamentului","local":"împinge-modelul-pe-hub-în-timpul-antrenamentului","sections":[],"depth":2},{"title":"Interpretează rezultatele antrenamentului","local":"interpretează-rezultatele-antrenamentului","sections":[],"depth":2},{"title":"Salvează și publică modelul","local":"salvează-și-publică-modelul","sections":[],"depth":2},{"title":"Generează text","local":"generează-text","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0"><a href="https://discuss.huggingface.co/t/chapter-2-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/course/en/chapter12/grpo_finetune.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> </div> <h1 class="relative group"><a id="exercițiu-practic-ajustează-fin-un-model-cu-grpo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#exercițiu-practic-ajustează-fin-un-model-cu-grpo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Exercițiu Practic: Ajustează fin un model cu GRPO</span></h1> <p data-svelte-h="svelte-1aw1c7o">Acum că ai văzut teoria, să o punem în practică! În acest exercițiu, vei ajusta fin un model cu GRPO.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1f4sd5g">Acest exercițiu a fost scris de expertul în ajustarea fină LLM <a href="https://huggingface.co/mlabonne" rel="nofollow">@mlabonne</a>.</p></div> <h2 class="relative group"><a id="instalează-dependențele" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#instalează-dependențele"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Instalează dependențele</span></h2> <p data-svelte-h="svelte-1x2tko9">În primul rând, să instalăm dependențele pentru acest exercițiu.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install -qqq datasets==3.2.0 transformers==4.47.1 trl==0.14.0 peft==0.14.0 accelerate==1.2.1 bitsandbytes==0.45.2 wandb==0.19.7 --progress-bar off | |
| !pip install -qqq flash-attn --no-build-isolation --progress-bar off<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-24z50p">Acum vom importa bibliotecile necesare.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-keyword">from</span> peft <span class="hljs-keyword">import</span> LoraConfig, get_peft_model | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> GRPOConfig, GRPOTrainer<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="importă-și-loghează-în-weights--biases" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#importă-și-loghează-în-weights--biases"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Importă și loghează în Weights & Biases</span></h2> <p data-svelte-h="svelte-o4fd8b">Weights & Biases este un instrument pentru înregistrarea și monitorizarea experimentelor tale. Îl vom folosi pentru a înregistra procesul nostru de ajustare fină.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> wandb | |
| wandb.login()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ynujfx">Poți face acest exercițiu fără să te loghezi în Weights & Biases, dar este recomandat să faci asta pentru a-ți urmări experimentele și pentru a interpreta rezultatele.</p> <h2 class="relative group"><a id="încarcă-setul-de-date" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#încarcă-setul-de-date"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Încarcă setul de date</span></h2> <p data-svelte-h="svelte-1p2crus">Acum, să încărcăm setul de date. În acest caz, vom folosi setul de date <a href="https://huggingface.co/datasets/mlabonne/smoltldr" rel="nofollow"><code>mlabonne/smoltldr</code></a>, care conține o listă de povești scurte.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->dataset = load_dataset(<span class="hljs-string">"mlabonne/smoltldr"</span>) | |
| <span class="hljs-built_in">print</span>(dataset)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="încarcă-modelul" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#încarcă-modelul"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Încarcă modelul</span></h2> <p data-svelte-h="svelte-kmx08r">Acum, să încărcăm modelul.</p> <p data-svelte-h="svelte-yvd150">Pentru acest exercițiu, vom folosi modelul <a href="https://huggingface.co/HuggingFaceTB/SmolLM2-135M" rel="nofollow"><code>SmolLM2-135M</code></a>.</p> <p data-svelte-h="svelte-18do2tn">Acesta este un model mic de 135M parametri care rulează pe hardware limitat. Aceasta face modelul ideal pentru învățare, dar nu este cel mai puternic model de acolo. Dacă ai acces la hardware mai puternic, poți încerca să ajustezi fin un model mai mare precum <a href="https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B" rel="nofollow"><code>SmolLM2-1.7B</code></a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model_id = <span class="hljs-string">"HuggingFaceTB/SmolLM-135M-Instruct"</span> | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=<span class="hljs-string">"auto"</span>, | |
| device_map=<span class="hljs-string">"auto"</span>, | |
| attn_implementation=<span class="hljs-string">"flash_attention_2"</span>, | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="încarcă-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#încarcă-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Încarcă LoRA</span></h2> <p data-svelte-h="svelte-17bndbl">Acum, să încărcăm configurația LoRA. Vom profita de LoRA pentru a reduce numărul de parametri antrenabili, și prin urmare amprenta de memorie de care avem nevoie pentru a ajusta fin modelul.</p> <p data-svelte-h="svelte-8vksb5">Dacă nu ești familiar cu LoRA, poți citi mai multe despre aceasta în <a href="https://huggingface.co/learn/course/en/chapter11/3" rel="nofollow">Capitolul 11</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Încarcă LoRA</span> | |
| lora_config = LoraConfig( | |
| task_type=<span class="hljs-string">"CAUSAL_LM"</span>, | |
| r=<span class="hljs-number">16</span>, | |
| lora_alpha=<span class="hljs-number">32</span>, | |
| target_modules=<span class="hljs-string">"all-linear"</span>, | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| <span class="hljs-built_in">print</span>(model.print_trainable_parameters())<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Parametri antrenabili totali: 135M<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="definește-funcția-de-recompensă" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#definește-funcția-de-recompensă"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Definește funcția de recompensă</span></h2> <p data-svelte-h="svelte-113ebbn">După cum s-a menționat în secțiunea anterioară, GRPO poate folosi orice funcție de recompensă pentru a îmbunătăți modelul. În acest caz, vom folosi o funcție de recompensă simplă care încurajează modelul să genereze text lung de 50 de token-uri.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Funcția de recompensă</span> | |
| ideal_length = <span class="hljs-number">50</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">reward_len</span>(<span class="hljs-params">completions, **kwargs</span>): | |
| <span class="hljs-keyword">return</span> [-<span class="hljs-built_in">abs</span>(ideal_length - <span class="hljs-built_in">len</span>(completion)) <span class="hljs-keyword">for</span> completion <span class="hljs-keyword">in</span> completions]<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="definește-argumentele-de-antrenament" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#definește-argumentele-de-antrenament"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Definește argumentele de antrenament</span></h2> <p data-svelte-h="svelte-1117cc3">Acum, să definim argumentele de antrenament. Vom folosi clasa <code>GRPOConfig</code> pentru a defini argumentele de antrenament într-un stil tipic <code>transformers</code>.</p> <p data-svelte-h="svelte-mnw10c">Dacă aceasta este prima dată când definești argumente de antrenament, poți verifica clasa <a href="https://huggingface.co/docs/transformers/en/main_classes/trainer#trainingarguments" rel="nofollow">TrainingArguments</a> pentru mai multe informații, sau <a href="https://huggingface.co/learn/course/en/chapter2/1" rel="nofollow">Capitolul 2</a> pentru o introducere detaliată.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Argumentele de antrenament</span> | |
| training_args = GRPOConfig( | |
| output_dir=<span class="hljs-string">"GRPO"</span>, | |
| learning_rate=<span class="hljs-number">2e-5</span>, | |
| per_device_train_batch_size=<span class="hljs-number">8</span>, | |
| gradient_accumulation_steps=<span class="hljs-number">2</span>, | |
| max_prompt_length=<span class="hljs-number">512</span>, | |
| max_completion_length=<span class="hljs-number">96</span>, | |
| num_generations=<span class="hljs-number">8</span>, | |
| optim=<span class="hljs-string">"adamw_8bit"</span>, | |
| num_train_epochs=<span class="hljs-number">1</span>, | |
| bf16=<span class="hljs-literal">True</span>, | |
| report_to=[<span class="hljs-string">"wandb"</span>], | |
| remove_unused_columns=<span class="hljs-literal">False</span>, | |
| logging_steps=<span class="hljs-number">1</span>, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gq0d21">Acum, putem inițializa antrenorul cu modelul, setul de date și argumentele de antrenament și să începem antrenamentul.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Antrenorul</span> | |
| trainer = GRPOTrainer( | |
| model=model, | |
| reward_funcs=[reward_len], | |
| args=training_args, | |
| train_dataset=dataset[<span class="hljs-string">"train"</span>], | |
| ) | |
| <span class="hljs-comment"># Antrenează modelul</span> | |
| wandb.init(project=<span class="hljs-string">"GRPO"</span>) | |
| trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1h7lapz">Antrenamentul durează aproximativ 1 oră pe un singur GPU A10G care este disponibil pe Google Colab sau prin Hugging Face Spaces.</p> <h2 class="relative group"><a id="împinge-modelul-pe-hub-în-timpul-antrenamentului" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#împinge-modelul-pe-hub-în-timpul-antrenamentului"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Împinge modelul pe Hub în timpul antrenamentului</span></h2> <p data-svelte-h="svelte-1mlx5ju">Dacă setăm argumentul <code>push_to_hub</code> la <code>True</code> și argumentul <code>model_id</code> la un nume de model valid, modelul va fi împins pe Hugging Face Hub în timp ce antrenăm. Aceasta este utilă dacă vrei să începi să testezi modelul imediat!</p> <h2 class="relative group"><a id="interpretează-rezultatele-antrenamentului" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#interpretează-rezultatele-antrenamentului"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Interpretează rezultatele antrenamentului</span></h2> <p data-svelte-h="svelte-1iil1t3"><code>GRPOTrainer</code> înregistrează recompensa din funcția ta de recompensă, pierderea, și o gamă de alte metrici.</p> <p data-svelte-h="svelte-10ijb1">Ne vom concentra pe recompensa din funcția de recompensă și pierderea.</p> <p data-svelte-h="svelte-1fptnm">După cum poți vedea, recompensa din funcția de recompensă se apropie de 0 pe măsură ce modelul învață. Acesta este un semn bun că modelul învață să genereze text de lungimea corectă.</p> <p data-svelte-h="svelte-14wmbhu"><img src="https://huggingface.co/reasoning-course/images/resolve/main/grpo/13.png" alt="Rezultatul oferit din funcția de recompensă"></p> <p data-svelte-h="svelte-18orwc4">Ai putea observa că pierderea începe de la zero și apoi crește în timpul antrenamentului, ceea ce poate părea contraontuitiv. Acest comportament este așteptat în GRPO și este direct legat de formularea matematică a algoritmului. Pierderea în GRPO este proporțională cu divergența KL (capsula relativă la politica originală). Pe măsură ce antrenamentul progresează, modelul învață să genereze text care se potrivește mai bine cu funcția de recompensă, făcându-l să devieze mai mult de la politica sa inițială. Această divergență crescândă se reflectă în valoarea pierderii în creștere, care de fapt indică că modelul se adaptează cu succes pentru a optimiza funcția de recompensă.</p> <p data-svelte-h="svelte-16bt8zt"><img src="https://huggingface.co/reasoning-course/images/resolve/main/grpo/14.png" alt="Pierderea"></p> <h2 class="relative group"><a id="salvează-și-publică-modelul" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#salvează-și-publică-modelul"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Salvează și publică modelul</span></h2> <p data-svelte-h="svelte-1330z3z">Să partajăm modelul cu comunitatea!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->merged_model = trainer.model.merge_and_unload() | |
| merged_model.push_to_hub( | |
| <span class="hljs-string">"SmolGRPO-135M"</span>, private=<span class="hljs-literal">False</span>, tags=[<span class="hljs-string">"GRPO"</span>, <span class="hljs-string">"Reasoning-Course"</span>] | |
| )<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="generează-text" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generează-text"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generează text</span></h2> <p data-svelte-h="svelte-1wmaqq6">🎉 Ai ajustat fin cu succes un model cu GRPO! Acum, să generăm puțin text cu modelul.</p> <p data-svelte-h="svelte-xb98yg">În primul rând, vom defini un document foarte lung!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">""" | |
| # Un document lung despre pisică | |
| Pisica (Felis catus), numită și pisica domestică sau pisica de casă, este un mamifer carnivor | |
| mic domesticit. Este singura specie domesticită din familia Felidae. Progresele în arheologie | |
| și genetică au arătat că domesticirea pisicii a avut loc în Orientul Apropiat în jurul anului | |
| 7500 î.Hr. Este ținută în mod obișnuit ca animal de companie și pisică de fermă, dar se află | |
| și în libertate ca pisică sălbatică evitând contactul uman. Este prețuită de oameni pentru | |
| companie și capacitatea sa de a ucide dăunători. Ghearele sale retractabile sunt adaptate | |
| pentru uciderea speciilor de pradă mici precum șoarecii și șobolanii. Are un corp puternic | |
| și flexibil, reflexe rapide și dinți ascuțiți, iar vederea nocturnă și simțul mirosului sunt | |
| bine dezvoltate. Este o specie socială, dar un vânător solitar și un prădător crepuscular. | |
| Comunicarea pisicii include vocalizări—incluzând mieunat, tors, triluri, șuierat, mormăit | |
| și grunțit—precum și limbajul corpului. Poate auzi sunete prea slabe sau prea înalte în | |
| frecvență pentru urechile umane, cum ar fi cele făcute de mamiferele mici. Secretă și | |
| percep feromoni. | |
| """</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-2c53sd">messages = [ | |
| {“role”: “user”, “content”: prompt}, | |
| ]</p> <p data-svelte-h="svelte-1fjrdkk">Acum, putem genera text cu modelul.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Generează text</span> | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| generator = pipeline(<span class="hljs-string">"text-generation"</span>, model=<span class="hljs-string">"SmolGRPO-135M"</span>) | |
| <span class="hljs-comment">## Sau folosește modelul și tokenizer-ul pe care le-am definit mai devreme</span> | |
| <span class="hljs-comment"># generator = pipeline("text-generation", model=model, tokenizer=tokenizer)</span> | |
| generate_kwargs = { | |
| <span class="hljs-string">"max_new_tokens"</span>: <span class="hljs-number">256</span>, | |
| <span class="hljs-string">"do_sample"</span>: <span class="hljs-literal">True</span>, | |
| <span class="hljs-string">"temperature"</span>: <span class="hljs-number">0.5</span>, | |
| <span class="hljs-string">"min_p"</span>: <span class="hljs-number">0.1</span>, | |
| } | |
| generated_text = generator(messages, generate_kwargs=generate_kwargs) | |
| <span class="hljs-built_in">print</span>(generated_text)<!-- HTML_TAG_END --></pre></div> <h1 class="relative group"><a id="concluzie" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#concluzie"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Concluzie</span></h1> <p data-svelte-h="svelte-xz9hfe">În acest capitol, am văzut cum să ajustăm fin un model cu GRPO. Am văzut de asemenea cum să interpretăm rezultatele antrenamentului și să generăm text cu modelul.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/rum/chapter12/5.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1ftlxhy = { | |
| assets: "/docs/course/pr_1069/rum", | |
| base: "/docs/course/pr_1069/rum", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/course/pr_1069/rum/_app/immutable/entry/start.1de7c3d2.js"), | |
| import("/docs/course/pr_1069/rum/_app/immutable/entry/app.1f82014c.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 32], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 51.7 kB
- Xet hash:
- d6c885aa3e19662341ba937be75b7506fd6dcc5cc444182496a6ee7ec9b0b69a
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.