Buckets:

rtrm's picture
download
raw
37 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Models&quot;,&quot;local&quot;:&quot;models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Creare un trasformatore&quot;,&quot;local&quot;:&quot;creare-un-trasformatore&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Diversi metodi di caricamento&quot;,&quot;local&quot;:&quot;diversi-metodi-di-caricamento&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Metodi di salvataggio&quot;,&quot;local&quot;:&quot;metodi-di-salvataggio&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Using a Transformer model for inference&quot;,&quot;local&quot;:&quot;using-a-transformer-model-for-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Uso dei tensori come input del modello&quot;,&quot;local&quot;:&quot;uso-dei-tensori-come-input-del-modello&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/course/pr_1069/it/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/entry/start.693d748d.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/scheduler.37c15a92.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/singletons.60b4c7a2.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/index.18351ede.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/paths.43b6516c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/entry/app.e9cfd099.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/index.2bf4358c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/nodes/0.bb8a536c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/nodes/15.0f8e2575.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/Youtube.1e50a667.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/CodeBlock.4e987730.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/DocNotebookDropdown.efc1fb7c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/FrameworkSwitchCourse.8d4d4ab6.js">
<link rel="modulepreload" href="/docs/course/pr_1069/it/_app/immutable/chunks/getInferenceSnippets.24b50994.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Models&quot;,&quot;local&quot;:&quot;models&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Creare un trasformatore&quot;,&quot;local&quot;:&quot;creare-un-trasformatore&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Diversi metodi di caricamento&quot;,&quot;local&quot;:&quot;diversi-metodi-di-caricamento&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Metodi di salvataggio&quot;,&quot;local&quot;:&quot;metodi-di-salvataggio&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Using a Transformer model for inference&quot;,&quot;local&quot;:&quot;using-a-transformer-model-for-inference&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Uso dei tensori come input del modello&quot;,&quot;local&quot;:&quot;uso-dei-tensori-come-input-del-modello&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="bg-white leading-none border border-gray-100 rounded-lg flex p-0.5 w-56 text-sm mb-4"><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-l bg-red-50 dark:bg-transparent text-red-600" href="?fw=pt"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> Pytorch </a><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-r text-gray-500 filter grayscale" href="?fw=tf"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="0.94em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 274"><path d="M145.726 42.065v42.07l72.861 42.07v-42.07l-72.86-42.07zM0 84.135v42.07l36.43 21.03V105.17L0 84.135zm109.291 21.035l-36.43 21.034v126.2l36.43 21.035v-84.135l36.435 21.035v-42.07l-36.435-21.034V105.17z" fill="#E55B2D"></path><path d="M145.726 42.065L36.43 105.17v42.065l72.861-42.065v42.065l36.435-21.03v-84.14zM255.022 63.1l-36.435 21.035v42.07l36.435-21.035V63.1zm-72.865 84.135l-36.43 21.035v42.07l36.43-21.036v-42.07zm-36.43 63.104l-36.436-21.035v84.135l36.435-21.035V210.34z" fill="#ED8E24"></path><path d="M145.726 0L0 84.135l36.43 21.035l109.296-63.105l72.861 42.07L255.022 63.1L145.726 0zm0 126.204l-36.435 21.03l36.435 21.036l36.43-21.035l-36.43-21.03z" fill="#F8BF3C"></path></svg> TensorFlow </a></div> <h1 class="relative group"><a id="models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Models</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> <a href="https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb" target="_blank"><img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"></a></div> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/AhChOFRegn4" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-ntfp38">In questa sezione vedremo da vicino come creare e usare un modello. Utilizzeremo la classe <code>AutoModel</code>, utile quando si vuole istanziare qualsiasi modello da un checkpoint.</p> <p data-svelte-h="svelte-7jknq">La classe <code>AutoModel</code> e tutti i suoi derivati sono in realtà semplici involucri dell’ampia varietà di modelli disponibili nella libreria. Si tratta di un involucro intelligente, in quanto è in grado di indovinare automaticamente l’architettura del modello appropriata per il checkpoint e successivamente di istanziare un modello con questa architettura.</p> <p data-svelte-h="svelte-19yjkd2">Tuttavia, se si conosce il tipo di modello che si vuole utilizzare, si può usare direttamente la classe che ne definisce l’architettura. Vediamo come funziona con un modello BERT.</p> <h2 class="relative group"><a id="creare-un-trasformatore" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#creare-un-trasformatore"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Creare un trasformatore</span></h2> <p data-svelte-h="svelte-sep5xb">La prima cosa da fare per inizializzare un modello BERT è caricare un oggetto di configurazione:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertConfig, BertModel
<span class="hljs-comment"># Creazione della configurazione</span>
config = BertConfig()
<span class="hljs-comment"># Creare il modello dalla configurazione</span>
model = BertModel(config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-mn48kp">La configurazione contiene molti attributi che vengono utilizzati per costruire il modello:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>(config)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->BertConfig {
[...]
<span class="hljs-string">&quot;hidden_size&quot;</span>: <span class="hljs-number">768</span>,
<span class="hljs-string">&quot;intermediate_size&quot;</span>: <span class="hljs-number">3072</span>,
<span class="hljs-string">&quot;max_position_embeddings&quot;</span>: <span class="hljs-number">512</span>,
<span class="hljs-string">&quot;num_attention_heads&quot;</span>: <span class="hljs-number">12</span>,
<span class="hljs-string">&quot;num_hidden_layers&quot;</span>: <span class="hljs-number">12</span>,
[...]
}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-wxezk7">Anche se non si è ancora visto cosa fanno tutti questi attributi, se ne dovrebbero riconoscere alcuni: l’attributo <code>hidden_size</code> definisce la dimensione del vettore <code>hidden_states</code>, e l’attributo <code>num_hidden_layers</code> definisce il numero di livelli del modello Transformer.</p> <h3 class="relative group"><a id="diversi-metodi-di-caricamento" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#diversi-metodi-di-caricamento"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Diversi metodi di caricamento</span></h3> <p data-svelte-h="svelte-17l7gl5">La creazione di un modello dalla configurazione predefinita lo inizializza con valori casuali:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertConfig, BertModel
config = BertConfig()
model = BertModel(config)
<span class="hljs-comment"># Il modello è inizializzato in modo casuale!</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-mgikte">Il modello può essere utilizzato in questo stato, ma produrrà risultati incomprensibili; è necessario addestrarlo prima.</p> <p data-svelte-h="svelte-r16bee">Potremmo addestrare il modello da zero sul compito da svolgere, ma come si è visto in <a href="/course/chapter1">Capitolo 1</a>, questo richiederebbe molto tempo e molti dati, oltre ad avere un impatto ambientale non trascurabile. Per evitare sforzi inutili, è indispensabile poter condividere e riutilizzare modelli già addestrati.</p> <p data-svelte-h="svelte-yusqd1">Caricare un modello Transformer già addestrato è semplice: lo si può fare usando il metodo <code>from_pretrained()</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel
model = BertModel.from_pretrained(<span class="hljs-string">&quot;bert-base-cased&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-39bzp6">Come abbiamo visto in precedenza, possiamo sostituire <code>BertModel</code> con la classe equivalente <code>AutoModel</code>. Lo faremo d’ora in poi, perché in questo modo si ottiene un codice cosiddetto “checkpoint-agnostic”; se il codice funziona per un checkpoint, dovrebbe funzionare senza problemi anche con un altro. Questo vale anche se l’architettura è diversa, purché il checkpoint sia stato addestrato per un compito simile (per esempio, un compito di sentiment analysis).</p> <p data-svelte-h="svelte-19oiivu">Nell’esempio di codice precedente non abbiamo usato <code>BertConfig</code> e abbiamo invece caricato un modello pre-addestrato tramite l’identificatore <code>bert-base-cased</code>. Si tratta di un checkpoint che è stato addestrato dagli stessi autori di BERT; si possono trovare maggiori dettagli su di esso nella sua <a href="https://huggingface.co/bert-base-cased" rel="nofollow">scheda modello</a>.</p> <p data-svelte-h="svelte-kymetl">Questo modello è ora inizializzato con tutti i pesi del checkpoint. Può essere utilizzato direttamente per effettuare inferenza sui compiti su cui è stato addestrato e può anche essere messo adattato ad un nuovo compito, tramite il fine tuning. Allenandosi con i pesi pre-addestrati piuttosto che partendo da zero, si possono ottenere rapidamente buoni risultati.</p> <p data-svelte-h="svelte-1ra7or1">I pesi sono stati scaricati e messi in cache (in modo che le future chiamate al metodo <code>from_pretrained()</code> non li scarichino di nuovo) nella cartella della cache, che per impostazione predefinita è <em>~/.cache/huggingface/transformers</em>. È possibile personalizzare la cartella della cache impostando la variabile d’ambiente <code>HF_HOME</code>.</p> <p data-svelte-h="svelte-1qx7292">L’identificatore usato per caricare il modello può essere l’identificatore di qualsiasi modello presente nel Model Hub, purché sia compatibile con l’architettura del BERT. L’elenco completo dei checkpoint BERT disponibili è disponibile <a href="https://huggingface.co/models?filter=bert" rel="nofollow">qui</a>.</p> <h3 class="relative group"><a id="metodi-di-salvataggio" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#metodi-di-salvataggio"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Metodi di salvataggio</span></h3> <p data-svelte-h="svelte-v1sy0v">Saving a model is as easy as loading one — we use the <code>save_pretrained()</code> method, which is analogous to the <code>from_pretrained()</code> method:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model.save_pretrained(<span class="hljs-string">&quot;directory_on_my_computer&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1y6yq67">In questo modo si salvano due file sul disco:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->ls <span class="hljs-keyword">directory_on_my_computer
</span>
<span class="hljs-built_in">config</span>.<span class="hljs-keyword">json </span>model.safetensors<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-re0uw2">Se si dà un’occhiata al file <em>config.json</em>, si riconoscono gli attributi necessari per costruire l’architettura del modello. Questo file contiene anche alcuni metadati, come l’origine del checkpoint e la versione di 🤗 Transformers utilizzata al momento dell’ultimo salvataggio del checkpoint.</p> <p data-svelte-h="svelte-13ve7mc">Il file <em>model.safetensors</em> è noto come <em>state dictionary</em>; contiene tutti i pesi del modello. I due file vanno di pari passo: la configurazione è necessaria per conoscere l’architettura del modello, mentre i pesi del modello sono i suoi parametri.</p> <h2 class="relative group"><a id="using-a-transformer-model-for-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-a-transformer-model-for-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using a Transformer model for inference</span></h2> <p data-svelte-h="svelte-78p3r8">Ora che si sa come caricare e salvare un modello, proviamo a usarlo per fare delle previsioni. I modelli di trasformatori possono elaborare solo numeri - numeri generati dal tokenizer. Ma prima di parlare dei tokenizer, analizziamo quali sono gli input accettati dal modello.</p> <p data-svelte-h="svelte-4vle6d">I tokenizer possono occuparsi di effettuare il casting degli input nei tensori del framework appropriato, ma per aiutarti a capire cosa sta succedendo, daremo una rapida occhiata a ciò che deve essere fatto prima di inviare gli input al modello.</p> <p data-svelte-h="svelte-amlzbw">Supponiamo di avere un paio di sequenze:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->sequences = [<span class="hljs-string">&quot;Hello!&quot;</span>, <span class="hljs-string">&quot;Cool.&quot;</span>, <span class="hljs-string">&quot;Nice!&quot;</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1635jd">The tokenizer converts these to vocabulary indices which are typically called <em>input IDs</em>. Each sequence is now a list of numbers! The resulting output is:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->encoded_sequences = [
[<span class="hljs-number">101</span>, <span class="hljs-number">7592</span>, <span class="hljs-number">999</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">4658</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">3835</span>, <span class="hljs-number">999</span>, <span class="hljs-number">102</span>],
]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dycvgp">Si tratta di una lista di sequenze codificate: una lista di liste. I tensori accettano solo forme rettangolari (si pensi alle matrici). Questo “array” è già di forma rettangolare, quindi convertirlo in un tensore è facile:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
model_inputs = torch.tensor(encoded_sequences)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="uso-dei-tensori-come-input-del-modello" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#uso-dei-tensori-come-input-del-modello"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Uso dei tensori come input del modello</span></h3> <p data-svelte-h="svelte-1bsyn9">Utilizzare i tensori con il modello è estremamente semplice: basta chiamare il modello con gli input:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->output = model(model_inputs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1u8wktc">Il modello accetta molti argomenti diversi, ma solo gli ID degli ingressi sono necessari. Spiegheremo in seguito cosa fanno gli altri argomenti e quando sono necessari, ma prima dobbiamo dare un’occhiata più da vicino ai tokenizer che costruiscono gli input che un modello Transformer può comprendere.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/it/chapter2/3.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_u8ez91 = {
assets: "/docs/course/pr_1069/it",
base: "/docs/course/pr_1069/it",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/course/pr_1069/it/_app/immutable/entry/start.693d748d.js"),
import("/docs/course/pr_1069/it/_app/immutable/entry/app.e9cfd099.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 15],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
37 kB
·
Xet hash:
c00cac916b3dfddb1bae5bfafffd6725154379735e3239992010448496f7623a

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.