Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Antrenarea de la zero a unui model de limbaj cauzal","local":"training-a-causal-language-model-from-scratch","sections":[{"title":"Colectarea datelor","local":"gathering-the-data","sections":[],"depth":2},{"title":"Pregătirea datasetului","local":"preparing-the-dataset","sections":[],"depth":2},{"title":"Inițializarea unui nou model","local":"initializing-a-new-model","sections":[],"depth":2},{"title":"Generarea codului cu un pipeline","local":"code-generation-with-a-pipeline","sections":[],"depth":2},{"title":"Antrenarea cu 🤗 Accelerate","local":"training-with-accelerate","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/course/pr_1069/rum/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/entry/start.1de7c3d2.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/scheduler.37c15a92.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/singletons.e13b7dfd.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/index.18351ede.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/paths.e130b7b0.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/entry/app.1f82014c.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/index.2bf4358c.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/nodes/0.3c83e1ab.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/nodes/81.47548d06.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/Tip.363c041f.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/Youtube.1e50a667.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/CodeBlock.4e987730.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/CourseFloatingBanner.6add7356.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/FrameworkSwitchCourse.8d4d4ab6.js"> | |
| <link rel="modulepreload" href="/docs/course/pr_1069/rum/_app/immutable/chunks/getInferenceSnippets.24b50994.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Antrenarea de la zero a unui model de limbaj cauzal","local":"training-a-causal-language-model-from-scratch","sections":[{"title":"Colectarea datelor","local":"gathering-the-data","sections":[],"depth":2},{"title":"Pregătirea datasetului","local":"preparing-the-dataset","sections":[],"depth":2},{"title":"Inițializarea unui nou model","local":"initializing-a-new-model","sections":[],"depth":2},{"title":"Generarea codului cu un pipeline","local":"code-generation-with-a-pipeline","sections":[],"depth":2},{"title":"Antrenarea cu 🤗 Accelerate","local":"training-with-accelerate","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="bg-white leading-none border border-gray-100 rounded-lg flex p-0.5 w-56 text-sm mb-4"><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-l bg-red-50 dark:bg-transparent text-red-600" href="?fw=pt"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> Pytorch </a><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-r text-gray-500 filter grayscale" href="?fw=tf"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="0.94em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 274"><path d="M145.726 42.065v42.07l72.861 42.07v-42.07l-72.86-42.07zM0 84.135v42.07l36.43 21.03V105.17L0 84.135zm109.291 21.035l-36.43 21.034v126.2l36.43 21.035v-84.135l36.435 21.035v-42.07l-36.435-21.034V105.17z" fill="#E55B2D"></path><path d="M145.726 42.065L36.43 105.17v42.065l72.861-42.065v42.065l36.435-21.03v-84.14zM255.022 63.1l-36.435 21.035v42.07l36.435-21.035V63.1zm-72.865 84.135l-36.43 21.035v42.07l36.43-21.036v-42.07zm-36.43 63.104l-36.436-21.035v84.135l36.435-21.035V210.34z" fill="#ED8E24"></path><path d="M145.726 0L0 84.135l36.43 21.035l109.296-63.105l72.861 42.07L255.022 63.1L145.726 0zm0 126.204l-36.435 21.03l36.435 21.036l36.43-21.035l-36.43-21.03z" fill="#F8BF3C"></path></svg> TensorFlow </a></div> <h1 class="relative group"><a id="training-a-causal-language-model-from-scratch" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-a-causal-language-model-from-scratch"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Antrenarea de la zero a unui model de limbaj cauzal</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"><a href="https://discuss.huggingface.co/t/chapter-7-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/en/chapter7/section6_pt.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> <a href="https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/en/chapter7/section6_pt.ipynb" target="_blank"><img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"></a></div> <p data-svelte-h="svelte-b4m2r4">Până acum, am folosit în principal modele preantrenate și le-am făcut fine-tuning pentru noi cazuri de utilizare prin reutilizarea weighturilor din preantrenare. După cum am văzut în <a href="/course/chapter1">Capitolul 1</a>, acest lucru este denumit în mod obișnuit <em>învățare prin transfer</em> și este o strategie foarte reușită pentru aplicarea modelelor Transformer la majoritatea cazurilor de utilizare din lumea reală în care datele etichetate sunt puține. În acest capitol, vom adopta o abordare diferită și vom antrena un model complet nou de la zero. Aceasta este o abordare bună dacă aveți multe date și este foarte diferită de datele de preantrenare utilizate pentru modelele disponibile. Cu toate acestea, preantrenarea unui model lingvistic necesită, de asemenea, mult mai multe resurse de calcul decât fine-tuningul unui model existent. Printre exemplele în care poate fi utilă antrenarea unui nou model se numără dataseturile formate din note muzicale, secvențe moleculare precum ADN sau limbaje de programare. Acestea din urmă au câștigat recent teren datorită unor instrumente precum TabNine și Copilot de la GitHub, alimentate de modelul Codex al OpenAI, care pot genera secvențe lungi de cod. Această sarcină de generare a textului este cel mai bine abordată cu modele de limbaj autoregresive sau cauzale, cum ar fi GPT-2.</p> <p data-svelte-h="svelte-x4ejk4">În această secțiune vom construi o versiune la scară redusă a unui model de generare a codului: ne vom concentra pe completări de o linie în loc de funcții sau clase complete, folosind un subset de cod Python. Atunci când lucrați cu date în Python, sunteți în contact frecvent Python data science stack, formată din bibliotecile <code>matplotlib</code>, <code>seaborn</code>, <code>pandas</code> și <code>scikit-learn</code>. Atunci când se utilizează aceste cadre, este frecvent să fie nevoie să se caute comenzi specifice, astfel încât ar fi bine dacă am putea utiliza un model care să efectueze aceste apeluri pentru noi.</p> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/Vpjb1lu0MDk" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-1m8la55">În <a href="/course/chapter6">Capitolul 6</a> am creat un tokenizer eficient pentru a procesa codul sursă Python, dar avem nevoie de un dataset la scară largă pe care să preantrenăm un model. Aici, vom aplica tokenizerul nostru la un corpus de cod Python derivat din repositoriile GitHub. Vom utiliza apoi API-ul <code>Trainer</code> și 🤗 Accelerate pentru a antrena modelul. Să trecem la treabă!</p> <iframe src="https://course-demos-codeparrot-ds.hf.space" frameborder="0" height="300" title="Gradio app" class="block dark:hidden container p-0 flex-grow space-iframe" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe> <p data-svelte-h="svelte-1uw1ifa">Aceasta este de fapt o prezentare a modelului care a fost antrenat și încărcat în Hub folosind codul prezentat în această secțiune. Îl puteți găsi <a href="https://huggingface.co/huggingface-course/codeparrot-ds?text=plt.imshow%28" rel="nofollow">aici</a>. Rețineți că, deoarece are loc o anumită randomizare în generarea textului, veți obține probabil un rezultat ușor diferit.</p> <h2 class="relative group"><a id="gathering-the-data" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#gathering-the-data"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Colectarea datelor</span></h2> <p data-svelte-h="svelte-1lmk8zm">Codul Python este disponibil din abundență în repositorii de cod, cum ar fi GitHub, pe care le putem utiliza pentru a crea un dataset prin scraping pentru fiecare repositoriu Python. Aceasta a fost abordarea adoptată în <a href="https://learning.oreilly.com/library/view/natural-language-processing/9781098136789/" rel="nofollow">Transformers textbook</a> pentru a preantrena un model GPT-2. Folosind o descărcare GitHub de aproximativ 180 GB care conține aproximativ 20 de milioane de fișiere Python numită <code>codeparrot</code>, autorii au construit un dataset pe care l-au oferit apoi pe <a href="https://huggingface.co/datasets/transformersbook/codeparrot" rel="nofollow">Hugging Face Hub</a>.</p> <p data-svelte-h="svelte-ocfyhb">Cu toate acestea, antrenarea pe întregul corpus consumă timp și puterea calculatorului, iar noi avem nevoie doar de subsetul datasetului referitor la Python data science stack. Așadar, să începem prin filtrarea datasetului <code>codeparrot</code> pentru toate fișierele care includ oricare dintre bibliotecile din aceast stack. Din cauza dimensiunii datasetului, dorim să evităm descărcarea acestuia; în schimb, vom utiliza funcția de streaming pentru a-l filtra din mers. Pentru a ne ajuta să filtrăm exemplele de cod care utilizează bibliotecile pe care le-am menționat mai devreme, vom utiliza următoarea funcție:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">any_keyword_in_string</span>(<span class="hljs-params">string, keywords</span>): | |
| <span class="hljs-keyword">for</span> keyword <span class="hljs-keyword">in</span> keywords: | |
| <span class="hljs-keyword">if</span> keyword <span class="hljs-keyword">in</span> string: | |
| <span class="hljs-keyword">return</span> <span class="hljs-literal">True</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-w81hdx">Să-l testăm pe două exemple:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->filters = [<span class="hljs-string">"pandas"</span>, <span class="hljs-string">"sklearn"</span>, <span class="hljs-string">"matplotlib"</span>, <span class="hljs-string">"seaborn"</span>] | |
| example_1 = <span class="hljs-string">"import numpy as np"</span> | |
| example_2 = <span class="hljs-string">"import pandas as pd"</span> | |
| <span class="hljs-built_in">print</span>( | |
| any_keyword_in_string(example_1, filters), any_keyword_in_string(example_2, filters) | |
| )<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-literal">False</span> <span class="hljs-literal">True</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-186rpvt">Putem folosi acest lucru pentru a crea o funcție care va transmite în flux datasetul și va filtra elementele dorite:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> collections <span class="hljs-keyword">import</span> defaultdict | |
| <span class="hljs-keyword">from</span> tqdm <span class="hljs-keyword">import</span> tqdm | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">filter_streaming_dataset</span>(<span class="hljs-params">dataset, filters</span>): | |
| filtered_dict = defaultdict(<span class="hljs-built_in">list</span>) | |
| total = <span class="hljs-number">0</span> | |
| <span class="hljs-keyword">for</span> sample <span class="hljs-keyword">in</span> tqdm(<span class="hljs-built_in">iter</span>(dataset)): | |
| total += <span class="hljs-number">1</span> | |
| <span class="hljs-keyword">if</span> any_keyword_in_string(sample[<span class="hljs-string">"content"</span>], filters): | |
| <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> sample.items(): | |
| filtered_dict[k].append(v) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"<span class="hljs-subst">{<span class="hljs-built_in">len</span>(filtered_dict[<span class="hljs-string">'content'</span>])/total:<span class="hljs-number">.2</span>%}</span> of data after filtering."</span>) | |
| <span class="hljs-keyword">return</span> Dataset.from_dict(filtered_dict)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1hcvr7a">Apoi, putem aplica pur și simplu această funcție pe datasetului din flux:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># This cell will take a very long time to execute, so you should skip it and go to</span> | |
| <span class="hljs-comment"># the next one!</span> | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| split = <span class="hljs-string">"train"</span> <span class="hljs-comment"># "valid"</span> | |
| filters = [<span class="hljs-string">"pandas"</span>, <span class="hljs-string">"sklearn"</span>, <span class="hljs-string">"matplotlib"</span>, <span class="hljs-string">"seaborn"</span>] | |
| data = load_dataset(<span class="hljs-string">f"transformersbook/codeparrot-<span class="hljs-subst">{split}</span>"</span>, split=split, streaming=<span class="hljs-literal">True</span>) | |
| filtered_data = filter_streaming_dataset(data, filters)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-number">3.26</span>% of data after filtering.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13so8r4">Acest lucru ne lasă cu aproximativ 3% din setul de date original, care este încă destul de mare - datasetul rezultat este de 6 GB și constă din 600.000 de scripturi Python!</p> <p data-svelte-h="svelte-lexd28">Filtrarea datasetului complet poate dura 2-3 ore, în funcție de calculator și de bandwidth. Dacă nu doriți să parcurgeți singur acest proces îndelungat, vă punem la dispoziție datasetul filtrat pe Hub pentru a-l descărca:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset, DatasetDict | |
| ds_train = load_dataset(<span class="hljs-string">"huggingface-course/codeparrot-ds-train"</span>, split=<span class="hljs-string">"train"</span>) | |
| ds_valid = load_dataset(<span class="hljs-string">"huggingface-course/codeparrot-ds-valid"</span>, split=<span class="hljs-string">"validation"</span>) | |
| raw_datasets = DatasetDict( | |
| { | |
| <span class="hljs-string">"train"</span>: ds_train, <span class="hljs-comment"># .shuffle().select(range(50000)),</span> | |
| <span class="hljs-string">"valid"</span>: ds_valid, <span class="hljs-comment"># .shuffle().select(range(500))</span> | |
| } | |
| ) | |
| raw_datasets<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'repo_name'</span>, <span class="hljs-string">'path'</span>, <span class="hljs-string">'copies'</span>, <span class="hljs-string">'size'</span>, <span class="hljs-string">'content'</span>, <span class="hljs-string">'license'</span>], | |
| num_rows: <span class="hljs-number">606720</span> | |
| }) | |
| valid: Dataset({ | |
| features: [<span class="hljs-string">'repo_name'</span>, <span class="hljs-string">'path'</span>, <span class="hljs-string">'copies'</span>, <span class="hljs-string">'size'</span>, <span class="hljs-string">'content'</span>, <span class="hljs-string">'license'</span>], | |
| num_rows: <span class="hljs-number">3322</span> | |
| }) | |
| })<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-p87iwd">Preantrenarea modelului de limbaj va dura ceva timp. Vă sugerăm să rulați mai întâi bucla de antrenare pe un sample de date prin decomentarea celor două linii parțiale de mai sus și să vă asigurați că antrenarea se finalizează cu succes și că modelele sunt stocate. Nimic nu este mai frustrant decât o rulare de antrenare care eșuează la ultimul pas pentru că ați uitat să creați un folder sau pentru că există o greșeală de tipar la sfârșitul buclei de antrenare!</p></div> <p data-svelte-h="svelte-2gb85e">Să ne uităm la un exemplu din dataset. Vom arăta doar primele 200 de caractere din fiecare câmp:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">for</span> key <span class="hljs-keyword">in</span> raw_datasets[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>]: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"<span class="hljs-subst">{key.upper()}</span>: <span class="hljs-subst">{raw_datasets[<span class="hljs-string">'train'</span>][<span class="hljs-number">0</span>][key][:<span class="hljs-number">200</span>]}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">'REPO_NAME: kmike/scikit-learn'</span> | |
| <span class="hljs-string">'PATH: sklearn/utils/__init__.py'</span> | |
| <span class="hljs-string">'COPIES: 3'</span> | |
| <span class="hljs-string">'SIZE: 10094'</span> | |
| <span class="hljs-string">'''CONTENT: """ | |
| The :mod:`sklearn.utils` module includes various utilites. | |
| """ | |
| from collections import Sequence | |
| import numpy as np | |
| from scipy.sparse import issparse | |
| import warnings | |
| from .murmurhash import murm | |
| LICENSE: bsd-3-clause'''</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6il8on">Putem vedea căci câmpul <code>content</code> conține codul pe care dorim ca modelul nostru să se antreneze. Acum că avem un dataset, trebuie să pregătim textele astfel încât acestea să fie într-un format adecvat pentru preantrenare.</p> <h2 class="relative group"><a id="preparing-the-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preparing-the-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pregătirea datasetului</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/ma1TrR7gE7I" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-11rzd2n">Primul pas va fi tokenizarea datelor, astfel încât să le putem utiliza pentru antrenare. Deoarece obiectivul nostru este de a autocompleta în principal apeluri scurte de funcții, putem păstra dimensiunea contextului relativ mică. Acest lucru are avantajul că putem antrena modelul mult mai rapid și că necesită semnificativ mai puțină memorie. Dacă este important pentru aplicația voastră să aveți mai mult context (de exemplu, dacă doriți ca modelul să scrie teste unitare pe baza unui fișier cu definiția funcției), asigurați-vă că măriți acest număr, dar rețineți, de asemenea, că acest lucru vine cu utilizare mai mare de memorie GPU. Pentru moment, să fixăm dimensiunea contextului la 128 de tokeni, spre deosebire de 1 024 sau 2 048 utilizate în GPT-2 sau respectiv GPT-3.</p> <p data-svelte-h="svelte-zqsxog">Majoritatea documentelor conțin mult mai mult de 128 de cuvinte, astfel încât trunchierea simplă a inputurilor la lungimea maximă ar elimina o mare parte din datasetul nostru. În schimb, vom utiliza opțiunea <code>return_overflowing_tokens</code> pentru a tokeniza întreagul input și a o împărți în mai multe bucăți, așa cum am făcut în <a href="/course/chapter6/4">Capitolul 6</a>. De asemenea, vom utiliza opțiunea <code>return_length</code> pentru a returna automat lungimea fiecărui fragment creat. Adesea, ultimul fragment va fi mai mic decât dimensiunea contextului, iar noi vom scăpa de aceste bucăți pentru a evita problemele de padding; nu avem nevoie de ele, deoarece oricum avem o mulțime de date.</p> <div class="flex justify-center" data-svelte-h="svelte-ad79t9"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/chunking_texts.svg" alt="Fragmentarea unui text mare în mai multe bucăți."> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/chunking_texts-dark.svg" alt="Fragmentarea unui text mare în mai multe bucăți."></div> <p data-svelte-h="svelte-os0dae">Să vedem exact cum funcționează acest lucru analizând primele două exemple:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| context_length = <span class="hljs-number">128</span> | |
| tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"huggingface-course/code-search-net-tokenizer"</span>) | |
| outputs = tokenizer( | |
| raw_datasets[<span class="hljs-string">"train"</span>][:<span class="hljs-number">2</span>][<span class="hljs-string">"content"</span>], | |
| truncation=<span class="hljs-literal">True</span>, | |
| max_length=context_length, | |
| return_overflowing_tokens=<span class="hljs-literal">True</span>, | |
| return_length=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Input IDs length: <span class="hljs-subst">{<span class="hljs-built_in">len</span>(outputs[<span class="hljs-string">'input_ids'</span>])}</span>"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Input chunk lengths: <span class="hljs-subst">{(outputs[<span class="hljs-string">'length'</span>])}</span>"</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Chunk mapping: <span class="hljs-subst">{outputs[<span class="hljs-string">'overflow_to_sample_mapping'</span>]}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Input IDs length: <span class="hljs-number">34</span> | |
| Input chunk lengths: [<span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">117</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">128</span>, <span class="hljs-number">41</span>] | |
| Chunk mapping: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1b8waz5">Putem vedea că obținem 34 de segmente în total din aceste două exemple. Uitându-ne la lungimea segmentelor, putem vedea că segmentele de la sfârșitul ambelor documente au mai puțin de 128 de token-uri (117 și, respectiv, 41). Acestea reprezintă doar o mică parte din totalul segmentelor pe care le avem, așa că le putem șterge în siguranță. Cu ajutorul câmpului <code>overflow_to_sample_mapping</code>, putem, de asemenea, să reconstituim care segmente au aparținut căror probe de intrare.</p> <p data-svelte-h="svelte-16vzg8w">Cu această operațiune folosim o caracteristică utilă a funcției <code>Dataset.map()</code> din 🤗 Datasets, și anume că nu necesită one-to-one maps; așa cum am văzut în <a href="/course/chaptero7/3">secțiunea 3</a>, putem crea batch-uri cu mai multe sau mai puține elemente decât batch-ul de intrare. Acest lucru este util atunci când efectuăm operațiuni precum augmentarea sau filtrarea datelor care modifică numărul de elemente. În cazul nostru, atunci când tokenizăm fiecare element în segmente de dimensiunea contextului specificat, creăm multe probe din fiecare document. Trebuie doar să ne asigurăm că ștergem coloanele existente, deoarece acestea au o dimensiune conflictuală. Dacă am dori să le păstrăm, am putea să le repetăm în mod corespunzător și să le returnăm în cadrul apelului <code>Dataset.map()</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize</span>(<span class="hljs-params">element</span>): | |
| outputs = tokenizer( | |
| element[<span class="hljs-string">"content"</span>], | |
| truncation=<span class="hljs-literal">True</span>, | |
| max_length=context_length, | |
| return_overflowing_tokens=<span class="hljs-literal">True</span>, | |
| return_length=<span class="hljs-literal">True</span>, | |
| ) | |
| input_batch = [] | |
| <span class="hljs-keyword">for</span> length, input_ids <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(outputs[<span class="hljs-string">"length"</span>], outputs[<span class="hljs-string">"input_ids"</span>]): | |
| <span class="hljs-keyword">if</span> length == context_length: | |
| input_batch.append(input_ids) | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"input_ids"</span>: input_batch} | |
| tokenized_datasets = raw_datasets.<span class="hljs-built_in">map</span>( | |
| tokenize, batched=<span class="hljs-literal">True</span>, remove_columns=raw_datasets[<span class="hljs-string">"train"</span>].column_names | |
| ) | |
| tokenized_datasets<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->DatasetDict({ | |
| train: Dataset({ | |
| features: [<span class="hljs-string">'input_ids'</span>], | |
| num_rows: <span class="hljs-number">16702061</span> | |
| }) | |
| valid: Dataset({ | |
| features: [<span class="hljs-string">'input_ids'</span>], | |
| num_rows: <span class="hljs-number">93164</span> | |
| }) | |
| })<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1b7i97r">Avem acum 16,7 milioane de exemple cu 128 de tokenii fiecare, ceea ce corespunde unui total de aproximativ 2,1 miliarde de tokeni. Ca referință, modelele GPT-3 și Codex ale OpenAI sunt antrenate pe 300 și, respectiv, 100 de miliarde de tokeni, unde modelele Codex sunt inițializate din checkpointurile GPT-3. Scopul nostru în această secțiune nu este de a concura cu aceste modele, care pot genera texte lungi și coerente, ci de a crea o versiune la scară redusă care să ofere o funcție rapidă de autocompletare pentru data scientists.</p> <p data-svelte-h="svelte-1jnxa23">Acum că avem datasetul gata, hai să configurăm modelul!</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-13vype2">✏️ <strong>Încercați!</strong> Eliminarea tuturor bucăților care sunt mai mici decât dimensiunea contextului nu a fost o problemă majoră aici, deoarece folosim ferestre de context mici. Pe măsură ce creșteți dimensiunea contextului (sau dacă aveți un corpus de documente scurte), fracțiunea de segmente care sunt aruncate va crește și ea. O modalitate mai eficientă de a pregăti datele este de a uni toate sampleurile tokenizate într-un batch cu un token <code>eos_token_id</code> între ele, iar apoi de a efectua chunkingul pe secvențele concatenate. Ca exercițiu, modificați funcția <code>tokenize()</code> pentru a utiliza această abordare. Rețineți că veți dori să setați <code>truncation=False</code> și să eliminați celelalte argumente din tokenizer pentru a obține secvența completă de token IDs.</p></div> <h2 class="relative group"><a id="initializing-a-new-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#initializing-a-new-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inițializarea unui nou model</span></h2> <p data-svelte-h="svelte-1bci551">Primul nostru pas este să inițializăm un model GPT-2. Vom utiliza aceeași configurație pentru modelul nostru ca și pentru modelul GPT-2 mic, deci încărcăm configurația preantrenată, ne asigurăm că dimensiunea tokenizerlui corespunde cu dimensiunea vocabularului modelului și transmitem ID-urile tokenilor <code>bos</code> și <code>eos</code> (începutul și sfârșitul secvenței):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, GPT2LMHeadModel, AutoConfig | |
| config = AutoConfig.from_pretrained( | |
| <span class="hljs-string">"gpt2"</span>, | |
| vocab_size=<span class="hljs-built_in">len</span>(tokenizer), | |
| n_ctx=context_length, | |
| bos_token_id=tokenizer.bos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-z2ph3y">Cu această configurație, putem încărca un nou model. Rețineți că aceasta este prima dată când nu folosim funcția <code>from_pretrained()</code>, deoarece inițializăm noi înșine un model:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = GPT2LMHeadModel(config) | |
| model_size = <span class="hljs-built_in">sum</span>(t.numel() <span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> model.parameters()) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"GPT-2 size: <span class="hljs-subst">{model_size/<span class="hljs-number">1000</span>**<span class="hljs-number">2</span>:<span class="hljs-number">.1</span>f}</span>M parameters"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->GPT-<span class="hljs-number">2</span> size: <span class="hljs-number">124.2</span>M parameters<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-kl78gw">Modelul nostru are 124 milioane de parametri pe care va trebui să le facem tune. Înainte de a începe antrenarea, trebuie să configurăm un data collator care se va ocupa de crearea batch-urilor. Putem utiliza colatorul <code>DataCollatorForLanguageModeling</code>, care este conceput special pentru modelarea limbajului (după cum sugerează subtil numele). Pe lângă stacking și paddingul batchurilor, acesta se ocupă și de crearea labelurilor modelului lingvistic - în modelarea cauzală a limbajului, inputurile servesc și ca labels (doar că sunt decalate cu un element), iar acest data collator le creează din mers în timpul antrenării, astfel încât să nu fie nevoie să duplicăm <code>input_ids</code>.</p> <p data-svelte-h="svelte-1ibjgzj">Rețineți că <code>DataCollatorForLanguageModeling</code> acceptă atât masked language masking (MLM), cât și causal language modeling(CLM). În mod implicit, acesta pregătește datele pentru MLM, dar putem trece la CLM prin setarea argumentului <code>mlm=False</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorForLanguageModeling | |
| tokenizer.pad_token = tokenizer.eos_token | |
| data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=<span class="hljs-literal">False</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ed1ynj">Să aruncăm o privire la un exemplu:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->out = data_collator([tokenized_datasets[<span class="hljs-string">"train"</span>][i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">5</span>)]) | |
| <span class="hljs-keyword">for</span> key <span class="hljs-keyword">in</span> out: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"<span class="hljs-subst">{key}</span> shape: <span class="hljs-subst">{out[key].shape}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->input_ids shape: torch.Size([<span class="hljs-number">5</span>, <span class="hljs-number">128</span>]) | |
| attention_mask shape: torch.Size([<span class="hljs-number">5</span>, <span class="hljs-number">128</span>]) | |
| labels shape: torch.Size([<span class="hljs-number">5</span>, <span class="hljs-number">128</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-18xxsbw">Putem vedea că exemplele sunt stacked și că toți tensorii au aceeași formă.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-19d3tgd">⚠️ Schimbarea inputurilor și a labelurilor pentru a le alinia are loc în interiorul modelului, astfel încât data collatorului doar copiază inputurile pentru a crea labeluri.</p></div> <p data-svelte-h="svelte-qmeovb">Acum avem totul pregătit pentru a ne antrena modelul - până la urmă nu a fost atât de greu! Înainte de a începe antrenamentul, trebuie să ne conectăm la Hugging Face. Dacă lucrați într-un notebook, puteți face acest lucru cu următoarea funcție de utilitate:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login | |
| notebook_login()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dujvgk">Aceasta va afișa un widget în care puteți introduce datele voastre de autentificare Hugging Face.</p> <p data-svelte-h="svelte-fhjhmt">Dacă nu lucrați într-un notebook, tastați următoarea linie în terminal:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->huggingface-cli login<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-eu8mmx">Tot ce a mai rămas de făcut este să configurăm argumentele de antrenare și să pornim <code>Trainer</code>-ul. Vom utiliza un cosine learning rate schedule cu un warmup și o dimensiune efectivă a batch-ului de 256 (<code>per_device_train_batch_size</code> * <code>gradient_accumulation_steps</code>). Acumularea gradientului este utilizată atunci când un singur batch nu încape în memorie și construiește treptat gradientul prin mai multe treceri înainte/înapoi. Vom vedea acest lucru în acțiune atunci când vom crea bucla de antrenare cu 🤗 Accelerate.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer, TrainingArguments | |
| args = TrainingArguments( | |
| output_dir=<span class="hljs-string">"codeparrot-ds"</span>, | |
| per_device_train_batch_size=<span class="hljs-number">32</span>, | |
| per_device_eval_batch_size=<span class="hljs-number">32</span>, | |
| evaluation_strategy=<span class="hljs-string">"steps"</span>, | |
| eval_steps=<span class="hljs-number">5_000</span>, | |
| logging_steps=<span class="hljs-number">5_000</span>, | |
| gradient_accumulation_steps=<span class="hljs-number">8</span>, | |
| num_train_epochs=<span class="hljs-number">1</span>, | |
| weight_decay=<span class="hljs-number">0.1</span>, | |
| warmup_steps=<span class="hljs-number">1_000</span>, | |
| lr_scheduler_type=<span class="hljs-string">"cosine"</span>, | |
| learning_rate=<span class="hljs-number">5e-4</span>, | |
| save_steps=<span class="hljs-number">5_000</span>, | |
| fp16=<span class="hljs-literal">True</span>, | |
| push_to_hub=<span class="hljs-literal">True</span>, | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| tokenizer=tokenizer, | |
| args=args, | |
| data_collator=data_collator, | |
| train_dataset=tokenized_datasets[<span class="hljs-string">"train"</span>], | |
| eval_dataset=tokenized_datasets[<span class="hljs-string">"valid"</span>], | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12i47r4">Acum putem doar să pornim <code>Trainer</code>-ul și să așteptăm ca antrenamentul să se termine. În funcție de executarea antrenării pe întregul set de antrenarea sau pe un subset al acestuia, va dura 20 minute, sau respectiv 2 ore, așa că luați câteva cafeluțe și o carte bună de citit!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-xsga2g">După finalizarea antrenării, putem trimite modelul și tokenizerul către Hub:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.push_to_hub()<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-dc1g4b">✏️ <strong>Try it out!</strong> Ne-a luat doar aproximativ 30 de linii de cod în plus față de <code>TrainingArguments</code> pentru a ajunge de la texte brute la antrenarea GPT-2. Încercați antrenarea cu propriul dataset și vedeți dacă puteți obține rezultate bune!</p></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-8jgqio">💡 Dacă aveți acces la un calculator cu mai multe GPU-uri, încercați să rulați codul acolo. <code>Trainer</code> gestionează automat mai multe calculatoare, iar acest lucru poate accelera foarte mult antrenamentul.</p></div> <h2 class="relative group"><a id="code-generation-with-a-pipeline" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#code-generation-with-a-pipeline"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generarea codului cu un pipeline</span></h2> <p data-svelte-h="svelte-1yv8wjm">Acum este momentul adevărului: să vedem cât de bine funcționează de fapt modelul antrenat! Putem vedea în loguri că pierderea a scăzut în mod constant, dar pentru a testa modelul, hai să vedem cât de bine funcționează la câteva încerări. Pentru a face acest lucru, vom încorpora modelul într-un <code>pipeline</code> de generare a textului și îl vom pune pe un GPU pentru generații rapide, dacă există unul disponibil:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| device = torch.device(<span class="hljs-string">"cuda"</span>) <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> torch.device(<span class="hljs-string">"cpu"</span>) | |
| pipe = pipeline( | |
| <span class="hljs-string">"text-generation"</span>, model=<span class="hljs-string">"huggingface-course/codeparrot-ds"</span>, device=device | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11n9xm4">Să începem cu sarcina simplă de a crea un scatter plot:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->txt = <span class="hljs-string">"""\ | |
| # create some data | |
| x = np.random.randn(100) | |
| y = np.random.randn(100) | |
| # crearea unui scatter plot cu x, y | |
| """</span> | |
| <span class="hljs-built_in">print</span>(pipe(txt, num_return_sequences=<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>][<span class="hljs-string">"generated_text"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># crearea unor date</span> | |
| x = np.random.randn(<span class="hljs-number">100</span>) | |
| y = np.random.randn(<span class="hljs-number">100</span>) | |
| <span class="hljs-comment"># crearea scatter plot cu x, y</span> | |
| plt.scatter(x, y) | |
| <span class="hljs-comment"># crearea scatter</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-314mkf">Rezultatul pare corect. Funcționează și pentru o operație <code>pandas</code>? Să vedem dacă putem crea un <code>DataFrame</code> din două array-uri:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->txt = <span class="hljs-string">"""\ | |
| # create some data | |
| x = np.random.randn(100) | |
| y = np.random.randn(100) | |
| # creați dataframeul din x și y | |
| """</span> | |
| <span class="hljs-built_in">print</span>(pipe(txt, num_return_sequences=<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>][<span class="hljs-string">"generated_text"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># create some data</span> | |
| x = np.random.randn(<span class="hljs-number">100</span>) | |
| y = np.random.randn(<span class="hljs-number">100</span>) | |
| <span class="hljs-comment"># crează un dataframe din x și y</span> | |
| df = pd.DataFrame({<span class="hljs-string">'x'</span>: x, <span class="hljs-string">'y'</span>: y}) | |
| df.insert(<span class="hljs-number">0</span>,<span class="hljs-string">'x'</span>, x) | |
| <span class="hljs-keyword">for</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f39pdp">Excelent, acesta este răspunsul corect - deși apoi introduce din nou coloana <code>x</code>. Deoarece numărul de tokeni generate este limitat, următoarea buclă <code>for</code> este întreruptă. Să vedem dacă putem face ceva un pic mai complex și dacă modelul ne ajută să folosim operația <code>groupby</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->txt = <span class="hljs-string">"""\ | |
| # dataframe with profession, income and name | |
| df = pd.DataFrame({'profession': x, 'income':y, 'name': z}) | |
| # calculați venitul mediu pe profesie | |
| """</span> | |
| <span class="hljs-built_in">print</span>(pipe(txt, num_return_sequences=<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>][<span class="hljs-string">"generated_text"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># dataframe with profession, income and name</span> | |
| df = pd.DataFrame({<span class="hljs-string">'profession'</span>: x, <span class="hljs-string">'income'</span>:y, <span class="hljs-string">'name'</span>: z}) | |
| <span class="hljs-comment"># calculeză venitul mediu pe profesie</span> | |
| profession = df.groupby([<span class="hljs-string">'profession'</span>]).mean() | |
| <span class="hljs-comment"># calculează</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1k2a5gy">Nu este rău; acesta este modul corect de a face acest lucru. În cele din urmă, să vedem dacă îl putem folosi și pentru <code>scikit-learn</code> și să configurăm un model Random Forest:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->txt = <span class="hljs-string">""" | |
| # import random forest regressor from scikit-learn | |
| from sklearn.ensemble import RandomForestRegressor | |
| # ajustați modelul Random Forest cu 300 de estimatori pe X, y: | |
| """</span> | |
| <span class="hljs-built_in">print</span>(pipe(txt, num_return_sequences=<span class="hljs-number">1</span>)[<span class="hljs-number">0</span>][<span class="hljs-string">"generated_text"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># import random forest regressor from scikit-learn</span> | |
| <span class="hljs-keyword">from</span> sklearn.ensemble <span class="hljs-keyword">import</span> RandomForestRegressor | |
| <span class="hljs-comment"># ajustați modelul Random Forest cu 300 de estimatori pe X, y:</span> | |
| rf = RandomForestRegressor(n_estimators=<span class="hljs-number">300</span>, random_state=random_state, max_depth=<span class="hljs-number">3</span>) | |
| rf.fit(X, y) | |
| rf<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-nvy9st">Privind aceste câteva exemple, se pare că modelul a învățat o parte din sintaxa Python data science stack(desigur, ar trebui să o evaluăm mai bine înainte de a implementa modelul în lumea reală). Cu toate acestea, uneori este nevoie de o mai mare personalizare a antrenării modelului pentru a obține performanța necesară pentru un anumit caz de utilizare. De exemplu, dacă am dori să actualizăm dinamic dimensiunea batch-ului sau să avem o buclă de antrenare condiționată care trece peste exemplele proaste din mers? O opțiune ar fi să facem subclass la <code>Trainer</code> și să adăugăm modificările necesare, dar uneori este mai simplu să scriem bucla de antrenare de la zero. Aici intervine 🤗 Accelerate.</p> <h2 class="relative group"><a id="training-with-accelerate" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-with-accelerate"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Antrenarea cu 🤗 Accelerate</span></h2> <p data-svelte-h="svelte-1deq73f">Am văzut cum să antrenăm un model cu <code>Trainer</code>, care poate permite o anumită personalizare. Cu toate acestea, uneori dorim control deplin asupra buclei de antrenare sau dorim să facem unele schimbări exotice. În acest caz, 🤗 Accelerate este o alegere excelentă, iar în această secțiune vom parcurge pașii de utilizare a acestuia pentru a ne antrena modelul. Pentru a face lucrurile mai interesante, vom adăuga și un twist buclei de antrenare.</p> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/Hm8_PgVTFuc" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-10sbmjw">Deoarece suntem interesați în principal de o autocompletare sensibilă pentru bibliotecile din domeniul data science, este logic să acordăm mai multă importanță exemplelor de antrenare care utilizează mai mult aceste biblioteci. Putem identifica cu ușurință aceste exemple prin utilizarea unor cuvinte-cheie precum <code>plt</code>, <code>pd</code>, <code>sk</code>, <code>fit</code> și <code>predict</code>, care sunt cele mai frecvente nume de import pentru <code>matplotlib.pyplot</code>, <code>pandas</code> și <code>sklearn</code>, precum și modelul fit/predict al acestora din urmă. Dacă acestea sunt reprezentate fiecare ca un singur simbol, putem verifica cu ușurință dacă apar în secvența de input. Tokenii pot avea un prefix de spațiu, deci vom verifica și aceste versiuni în vocabularul tokenizerului. Pentru a verifica dacă funcționează, vom adăuga un token de test care ar trebui să fie împărțit în mai mulți tokeni:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->keytoken_ids = [] | |
| <span class="hljs-keyword">for</span> keyword <span class="hljs-keyword">in</span> [ | |
| <span class="hljs-string">"plt"</span>, | |
| <span class="hljs-string">"pd"</span>, | |
| <span class="hljs-string">"sk"</span>, | |
| <span class="hljs-string">"fit"</span>, | |
| <span class="hljs-string">"predict"</span>, | |
| <span class="hljs-string">" plt"</span>, | |
| <span class="hljs-string">" pd"</span>, | |
| <span class="hljs-string">" sk"</span>, | |
| <span class="hljs-string">" fit"</span>, | |
| <span class="hljs-string">" predict"</span>, | |
| <span class="hljs-string">"testtest"</span>, | |
| ]: | |
| ids = tokenizer([keyword]).input_ids[<span class="hljs-number">0</span>] | |
| <span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(ids) == <span class="hljs-number">1</span>: | |
| keytoken_ids.append(ids[<span class="hljs-number">0</span>]) | |
| <span class="hljs-keyword">else</span>: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"Keyword has not single token: <span class="hljs-subst">{keyword}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">'Keyword has not single token: testtest'</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ciqxzg">Grozav, se pare că funcționează bine! Acum putem scrie o funcție de pierdere personalizată care ia ca secvența de input, logurile și tokenii cheie pe care tocmai le-am selectat. În primul rând, trebuie să aliniem logurile și inputurile: secvența de intrare deplasată cu o unitate la dreapta formează labeluri, deoarece următorul tokenul este labelul pentru tokenul curent. Putem realiza acest lucru începând cu labelurile de la al doilea token al secvenței de intrare, deoarece modelul nu face o predicție pentru primul token în orice caz. Apoi tăiem ultimul logit, deoarece nu avem un label pentru tokenul care urmează secvenței complete de intrare. Astfel, putem calcula pierderea per sample și putem număra aparițiile tuturor cuvintelor-cheie în fiecare sample. În cele din urmă, calculăm media weighturilor pe fiecare sample folosind aparițiile ca weighturi. Deoarece nu dorim să eliminăm toate sampleurile care nu au cuvinte-cheie, adăugăm 1 la weighturi:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.nn <span class="hljs-keyword">import</span> CrossEntropyLoss | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">keytoken_weighted_loss</span>(<span class="hljs-params">inputs, logits, keytoken_ids, alpha=<span class="hljs-number">1.0</span></span>): | |
| <span class="hljs-comment"># Shift so that tokens < n predict n</span> | |
| shift_labels = inputs[..., <span class="hljs-number">1</span>:].contiguous() | |
| shift_logits = logits[..., :-<span class="hljs-number">1</span>, :].contiguous() | |
| <span class="hljs-comment"># Calculate per-token loss</span> | |
| loss_fct = CrossEntropyLoss(reduce=<span class="hljs-literal">False</span>) | |
| loss = loss_fct(shift_logits.view(-<span class="hljs-number">1</span>, shift_logits.size(-<span class="hljs-number">1</span>)), shift_labels.view(-<span class="hljs-number">1</span>)) | |
| <span class="hljs-comment"># Resize and average loss per sample</span> | |
| loss_per_sample = loss.view(shift_logits.size(<span class="hljs-number">0</span>), shift_logits.size(<span class="hljs-number">1</span>)).mean(axis=<span class="hljs-number">1</span>) | |
| <span class="hljs-comment"># Calculate and scale weighting</span> | |
| weights = torch.stack([(inputs == kt).<span class="hljs-built_in">float</span>() <span class="hljs-keyword">for</span> kt <span class="hljs-keyword">in</span> keytoken_ids]).<span class="hljs-built_in">sum</span>( | |
| axis=[<span class="hljs-number">0</span>, <span class="hljs-number">2</span>] | |
| ) | |
| weights = alpha * (<span class="hljs-number">1.0</span> + weights) | |
| <span class="hljs-comment"># Calculate weighted average</span> | |
| weighted_loss = (loss_per_sample * weights).mean() | |
| <span class="hljs-keyword">return</span> weighted_loss<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gweezu">Înainte de a începe antrenamentul cu această nouă funcție de pierdere minunată, trebuie să pregătim câteva lucruri:</p> <ul data-svelte-h="svelte-w4uvxk"><li>Avem nevoie de dataloaders pentru a încărca datele în batch-uri.</li> <li>Trebuie să configurăm parametrii de scădere a weighturilor.</li> <li>Din când în când, dorim să evaluăm, astfel încât este logic să includem codul de evaluare într-o funcție.</li></ul> <p data-svelte-h="svelte-ham5e0">Să începem cu dataloaders. Trebuie doar să setăm formatul datasetului la <code>"torch"</code>, iar apoi îl putem trece la un <code>DataLoader</code> PyTorch cu dimensiunea corespunzătoare a batch-lui:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.utils.data.dataloader <span class="hljs-keyword">import</span> DataLoader | |
| tokenized_datasets.set_format(<span class="hljs-string">"torch"</span>) | |
| train_dataloader = DataLoader(tokenized_datasets[<span class="hljs-string">"train"</span>], batch_size=<span class="hljs-number">32</span>, shuffle=<span class="hljs-literal">True</span>) | |
| eval_dataloader = DataLoader(tokenized_datasets[<span class="hljs-string">"valid"</span>], batch_size=<span class="hljs-number">32</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-114yi2l">În continuare, grupăm parametrii astfel încât optimizatorul să știe care dintre aceștia vor primi o scădere suplimentară a weighturilor. De obicei, toți termenii weighturilor bias și LayerNorm sunt scutiți de acest lucru; iată cum putem face acest lucru:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->weight_decay = <span class="hljs-number">0.1</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">get_grouped_params</span>(<span class="hljs-params">model, no_decay=[<span class="hljs-string">"bias"</span>, <span class="hljs-string">"LayerNorm.weight"</span>]</span>): | |
| params_with_wd, params_without_wd = [], [] | |
| <span class="hljs-keyword">for</span> n, p <span class="hljs-keyword">in</span> model.named_parameters(): | |
| <span class="hljs-keyword">if</span> <span class="hljs-built_in">any</span>(nd <span class="hljs-keyword">in</span> n <span class="hljs-keyword">for</span> nd <span class="hljs-keyword">in</span> no_decay): | |
| params_without_wd.append(p) | |
| <span class="hljs-keyword">else</span>: | |
| params_with_wd.append(p) | |
| <span class="hljs-keyword">return</span> [ | |
| {<span class="hljs-string">"params"</span>: params_with_wd, <span class="hljs-string">"weight_decay"</span>: weight_decay}, | |
| {<span class="hljs-string">"params"</span>: params_without_wd, <span class="hljs-string">"weight_decay"</span>: <span class="hljs-number">0.0</span>}, | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1i6yszn">Deoarece dorim să evaluăm modelul în mod regulat pe setul de validare în timpul antrenării, trebuie să scriem o funcție și pentru acest lucru. Aceasta rulează pur și simplu prin dataloaderul de evaluare și adună toate pierderile în cadrul proceselor:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">evaluate</span>(): | |
| model.<span class="hljs-built_in">eval</span>() | |
| losses = [] | |
| <span class="hljs-keyword">for</span> step, batch <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(eval_dataloader): | |
| <span class="hljs-keyword">with</span> torch.no_grad(): | |
| outputs = model(batch[<span class="hljs-string">"input_ids"</span>], labels=batch[<span class="hljs-string">"input_ids"</span>]) | |
| losses.append(accelerator.gather(outputs.loss)) | |
| loss = torch.mean(torch.cat(losses)) | |
| <span class="hljs-keyword">try</span>: | |
| perplexity = torch.exp(loss) | |
| <span class="hljs-keyword">except</span> OverflowError: | |
| perplexity = <span class="hljs-built_in">float</span>(<span class="hljs-string">"inf"</span>) | |
| <span class="hljs-keyword">return</span> loss.item(), perplexity.item()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15i7smm">Cu funcția <code>evaluate()</code> putem raporta pierderile și <a href="/course/chapter7/3">perplexitatea</a> la intervale regulate. În continuare, redefinim modelul nostru pentru a ne asigura că antrenăm din nou de la zero:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = GPT2LMHeadModel(config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-i04062">Apoi putem defini optimizatorul nostru, folosind funcția de mai devreme pentru a împărți parametrii pentru scăderea weighturilor:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW | |
| optimizer = AdamW(get_grouped_params(model), lr=<span class="hljs-number">5e-4</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1olt95c">Acum să pregătim modelul, optimizatorul și dataloaderurile, astfel încât să putem începe antrenamentul:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator | |
| accelerator = Accelerator(fp16=<span class="hljs-literal">True</span>) | |
| model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare( | |
| model, optimizer, train_dataloader, eval_dataloader | |
| )<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-sg2lu8">🚨 Dacă antrenați pe un TPU, va trebui să mutați tot codul începând cu celula de mai sus într-o funcție de antrenare dedicată. Consultați <a href="/course/chapter3">Capitolul 3</a> pentru mai multe detalii.</p></div> <p data-svelte-h="svelte-1g8mr4u">Acum că am trimis <code>train_dataloader</code> la <code>accelerator.prepare()</code>, putem utiliza lungimea acestuia pentru a calcula numărul de pași de antrenare. Rețineți că ar trebui să facem acest lucru întotdeauna după ce pregătim dataloaderurile, deoarece această metodă îi va modifica lungimea. Utilizăm un program liniar clasic de la rata de învățare la 0:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> get_scheduler | |
| num_train_epochs = <span class="hljs-number">1</span> | |
| num_update_steps_per_epoch = <span class="hljs-built_in">len</span>(train_dataloader) | |
| num_training_steps = num_train_epochs * num_update_steps_per_epoch | |
| lr_scheduler = get_scheduler( | |
| name=<span class="hljs-string">"linear"</span>, | |
| optimizer=optimizer, | |
| num_warmup_steps=<span class="hljs-number">1_000</span>, | |
| num_training_steps=num_training_steps, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1skur9m">În cele din urmă, pentru a trimite modelul nostru către Hub, va trebui să creăm un obiect <code>Repository</code> într-un folder de lucru. În primul rând, conectați-vă la Hugging Face Hub, dacă nu sunteți deja conectat. Vom determina numele repositoriului pornind de la ID-ul modelului pe care dorim să îl atribuim modelului nostru (nu ezitați să înlocuiți <code>repo_name</code> cu propria alegere; acesta trebuie doar să conțină numele vostru de utilizator, ceea ce face funcția <code>get_full_repo_name()</code>):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> Repository, get_full_repo_name | |
| model_name = <span class="hljs-string">"codeparrot-ds-accelerate"</span> | |
| repo_name = get_full_repo_name(model_name) | |
| repo_name<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">'sgugger/codeparrot-ds-accelerate'</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-8vuhx6">Apoi putem clona acel repositoriu într-un folder local. Dacă există deja, acest folder local ar trebui să fie o clonă existentă a repositoriul cu care lucrăm:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->output_dir = <span class="hljs-string">"codeparrot-ds-accelerate"</span> | |
| repo = Repository(output_dir, clone_from=repo_name)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wk5dqw">Acum putem încărca orice salvăm în <code>output_dir</code> prin apelarea metodei <code>repo.push_to_hub()</code>. Acest lucru ne va ajuta să încărcăm modelele intermediare la sfârșitul fiecărei epoci.</p> <p data-svelte-h="svelte-jm72ma">Înainte de antrenament, să efectuăm un test rapid pentru a vedea dacă funcția de evaluare funcționează corect:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->evaluate()<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->(<span class="hljs-number">10.934126853942871</span>, <span class="hljs-number">56057.14453125</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pi990m">Acestea sunt valori foarte ridicate pentru pierdere și perplexitate, dar acest lucru nu este surprinzător, deoarece nu am antrenat încă modelul. Astfel, avem totul pregătit pentru a scrie partea principală a scriptului de antrenare: bucla de antrenare. În bucla de antrenare, iterăm peste dataloader și transmitem batch-urile către model. Cu logurile, putem apoi evalua funcția noastră de pierdere personalizată. Redimensionăm pierderea în funcție de numărul de etape de acumulare a gradientului pentru a nu crea pierderi mai mari atunci când agregăm mai multe etape. Înainte de a optimiza, comprimăm, de asemenea, gradienții pentru o mai bună convergență. În cele din urmă, la fiecare câțiva pași, evaluăm modelul pe setul de evaluare cu noua noastră funcție <code>evaluate()</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> tqdm.notebook <span class="hljs-keyword">import</span> tqdm | |
| gradient_accumulation_steps = <span class="hljs-number">8</span> | |
| eval_steps = <span class="hljs-number">5_000</span> | |
| model.train() | |
| completed_steps = <span class="hljs-number">0</span> | |
| <span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_train_epochs): | |
| <span class="hljs-keyword">for</span> step, batch <span class="hljs-keyword">in</span> tqdm( | |
| <span class="hljs-built_in">enumerate</span>(train_dataloader, start=<span class="hljs-number">1</span>), total=num_training_steps | |
| ): | |
| logits = model(batch[<span class="hljs-string">"input_ids"</span>]).logits | |
| loss = keytoken_weighted_loss(batch[<span class="hljs-string">"input_ids"</span>], logits, keytoken_ids) | |
| <span class="hljs-keyword">if</span> step % <span class="hljs-number">100</span> == <span class="hljs-number">0</span>: | |
| accelerator.<span class="hljs-built_in">print</span>( | |
| { | |
| <span class="hljs-string">"samples"</span>: step * samples_per_step, | |
| <span class="hljs-string">"steps"</span>: completed_steps, | |
| <span class="hljs-string">"loss/train"</span>: loss.item() * gradient_accumulation_steps, | |
| } | |
| ) | |
| loss = loss / gradient_accumulation_steps | |
| accelerator.backward(loss) | |
| <span class="hljs-keyword">if</span> step % gradient_accumulation_steps == <span class="hljs-number">0</span>: | |
| accelerator.clip_grad_norm_(model.parameters(), <span class="hljs-number">1.0</span>) | |
| optimizer.step() | |
| lr_scheduler.step() | |
| optimizer.zero_grad() | |
| completed_steps += <span class="hljs-number">1</span> | |
| <span class="hljs-keyword">if</span> (step % (eval_steps * gradient_accumulation_steps)) == <span class="hljs-number">0</span>: | |
| eval_loss, perplexity = evaluate() | |
| accelerator.<span class="hljs-built_in">print</span>({<span class="hljs-string">"loss/eval"</span>: eval_loss, <span class="hljs-string">"perplexity"</span>: perplexity}) | |
| model.train() | |
| accelerator.wait_for_everyone() | |
| unwrapped_model = accelerator.unwrap_model(model) | |
| unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save) | |
| <span class="hljs-keyword">if</span> accelerator.is_main_process: | |
| tokenizer.save_pretrained(output_dir) | |
| repo.push_to_hub( | |
| commit_message=<span class="hljs-string">f"Training in progress step <span class="hljs-subst">{step}</span>"</span>, blocking=<span class="hljs-literal">False</span> | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zhz1o9">Și asta e tot - acum aveți propria buclă de antrenare personalizată pentru modele de limbaj cauzal, cum ar fi GPT-2, pe care o puteți personaliza în continuare în funcție de nevoile voastre.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1nf6k9s">✏️ <strong>încercați!</strong> Fie vă creați propria funcție de pierdere personalizată, adaptată la cazul vostru de utilizare, fie adăugați un alt pas personalizat în bucla de antrenare.</p></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-uxqq5y">✏️ <strong>încercați!</strong> Atunci când efectuați experimente de antrenare de lungă durată, este o idee bună să înregistrați parametrii importanți utilizând instrumente precum TensorBoard sau Weights & Biases. Adăugați o logare adecvată la bucla de antrenare, astfel încât să puteți verifica întotdeauna cum decurge antrenarea.</p></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/rum/chapter7/6.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1ftlxhy = { | |
| assets: "/docs/course/pr_1069/rum", | |
| base: "/docs/course/pr_1069/rum", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/course/pr_1069/rum/_app/immutable/entry/start.1de7c3d2.js"), | |
| import("/docs/course/pr_1069/rum/_app/immutable/entry/app.1f82014c.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 81], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 137 kB
- Xet hash:
- e7904d8f8a8f0cc2d4a2fe10bc9dde9708104447960af07c085d45566cf9c82d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.