Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / transformers /pr_33913 /de /llm_tutorial.html

46.2 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Generation with LLMs","local":"generation-with-llms","sections":[{"title":"Text generieren","local":"text-generieren","sections":[],"depth":2},{"title":"Häufige Fallstricke","local":"häufige-fallstricke","sections":[{"title":"Generierte Ausgabe ist zu kurz/lang","local":"generierte-ausgabe-ist-zu-kurzlang","sections":[],"depth":3},{"title":"Falscher Generierungsmodus","local":"falscher-generierungsmodus","sections":[],"depth":3},{"title":"Falsche Auffüllseite","local":"falsche-auffüllseite","sections":[],"depth":3}],"depth":2},{"title":"Weitere Ressourcen","local":"weitere-ressourcen","sections":[{"title":"Fortgeschrittene Nutzung generieren","local":"fortgeschrittene-nutzung-generieren","sections":[],"depth":3},{"title":"LLM-Ranglisten","local":"llm-ranglisten","sections":[],"depth":3},{"title":"Latenz und Durchsatz","local":"latenz-und-durchsatz","sections":[],"depth":3},{"title":"Verwandte Bibliotheken","local":"verwandte-bibliotheken","sections":[],"depth":3}],"depth":2}],"depth":1}">
	<link href="/docs/transformers/pr_33913/de/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/entry/start.4cb54951.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/scheduler.987d3921.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/singletons.cc4df6a9.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/index.42b1abb7.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/paths.7a1265c0.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/entry/app.8a9a40b8.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/index.c8b1fed4.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/nodes/0.509a6df1.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/nodes/9.cd11e197.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/Tip.6bc1e794.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/CodeBlock.18094d58.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/DocNotebookDropdown.8be6c56e.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/globals.7f7f1b26.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/de/_app/immutable/chunks/EditOnGithub.a3fde557.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Generation with LLMs","local":"generation-with-llms","sections":[{"title":"Text generieren","local":"text-generieren","sections":[],"depth":2},{"title":"Häufige Fallstricke","local":"häufige-fallstricke","sections":[{"title":"Generierte Ausgabe ist zu kurz/lang","local":"generierte-ausgabe-ist-zu-kurzlang","sections":[],"depth":3},{"title":"Falscher Generierungsmodus","local":"falscher-generierungsmodus","sections":[],"depth":3},{"title":"Falsche Auffüllseite","local":"falsche-auffüllseite","sections":[],"depth":3}],"depth":2},{"title":"Weitere Ressourcen","local":"weitere-ressourcen","sections":[{"title":"Fortgeschrittene Nutzung generieren","local":"fortgeschrittene-nutzung-generieren","sections":[],"depth":3},{"title":"LLM-Ranglisten","local":"llm-ranglisten","sections":[],"depth":3},{"title":"Latenz und Durchsatz","local":"latenz-und-durchsatz","sections":[],"depth":3},{"title":"Verwandte Bibliotheken","local":"verwandte-bibliotheken","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="generation-with-llms" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generation-with-llms"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generation with LLMs</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <p data-svelte-h="svelte-140yavd">LLMs (Large Language Models) sind die Schlüsselkomponente bei der Texterstellung. Kurz gesagt, bestehen sie aus großen, vortrainierten Transformationsmodellen, die darauf trainiert sind, das nächste Wort (oder genauer gesagt Token) aus einem Eingabetext vorherzusagen. Da sie jeweils ein Token vorhersagen, müssen Sie etwas Aufwändigeres tun, um neue Sätze zu generieren, als nur das Modell aufzurufen - Sie müssen eine autoregressive Generierung durchführen.</p> <p data-svelte-h="svelte-1nh5cju">Die autoregressive Generierung ist ein Verfahren zur Inferenzzeit, bei dem ein Modell mit seinen eigenen generierten Ausgaben iterativ aufgerufen wird, wenn einige anfängliche Eingaben vorliegen. In 🤗 Transformers wird dies von der Methode <code>generate()</code> übernommen, die allen Modellen mit generativen Fähigkeiten zur Verfügung steht.</p> <p data-svelte-h="svelte-ipf9gg">Dieses Tutorial zeigt Ihnen, wie Sie:</p> <ul data-svelte-h="svelte-imc6q0"><li>Text mit einem LLM generieren</li> <li>Vermeiden Sie häufige Fallstricke</li> <li>Nächste Schritte, damit Sie das Beste aus Ihrem LLM herausholen können</li></ul> <p data-svelte-h="svelte-28fang">Bevor Sie beginnen, stellen Sie sicher, dass Sie alle erforderlichen Bibliotheken installiert haben:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install transformers bitsandbytes>=0.39.0 -q<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="text-generieren" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-generieren"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text generieren</span></h2> <p data-svelte-h="svelte-zi1jgg">Ein Sprachmodell, das für <a href="tasks/language_modeling">causal language modeling</a> trainiert wurde, nimmt eine Folge von Text-Token als Eingabe und gibt die Wahrscheinlichkeitsverteilung für das nächste Token zurück.</p> <figure class="image table text-center m-0 w-full" data-svelte-h="svelte-hjgddv"><video style="max-width: 90%; margin: auto;" autoplay="" loop="" muted="" playsinline="" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/assisted-generation/gif_1_1080p.mov"></video> <figcaption>"Forward pass of an LLM"</figcaption></figure> <p data-svelte-h="svelte-1ilwa1m">Ein wichtiger Aspekt der autoregressiven Generierung mit LLMs ist die Auswahl des nächsten Tokens aus dieser Wahrscheinlichkeitsverteilung. In diesem Schritt ist alles möglich, solange Sie am Ende ein Token für die nächste Iteration haben. Das heißt, es kann so einfach sein wie die Auswahl des wahrscheinlichsten Tokens aus der Wahrscheinlichkeitsverteilung oder so komplex wie die Anwendung von einem Dutzend Transformationen vor der Stichprobenziehung aus der resultierenden Verteilung.</p> <figure class="image table text-center m-0 w-full" data-svelte-h="svelte-4pjbi0"><video style="max-width: 90%; margin: auto;" autoplay="" loop="" muted="" playsinline="" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/assisted-generation/gif_2_1080p.mov"></video> <figcaption>"Die autoregressive Generierung wählt iterativ das nächste Token aus einer Wahrscheinlichkeitsverteilung aus, um Text zu erzeugen"</figcaption></figure> <p data-svelte-h="svelte-1j4vblf">Der oben dargestellte Prozess wird iterativ wiederholt, bis eine bestimmte Abbruchbedingung erreicht ist. Im Idealfall wird die Abbruchbedingung vom Modell vorgegeben, das lernen sollte, wann es ein Ende-der-Sequenz-Token (EOS) ausgeben muss. Ist dies nicht der Fall, stoppt die Generierung, wenn eine vordefinierte Maximallänge erreicht ist.</p> <p data-svelte-h="svelte-5vhx9n">Damit sich Ihr Modell so verhält, wie Sie es für Ihre Aufgabe erwarten, müssen Sie den Schritt der Token-Auswahl und die Abbruchbedingung richtig einstellen. Aus diesem Grund haben wir zu jedem Modell eine <code>GenerationConfig</code>-Datei, die eine gute generative Standardparametrisierung enthält und zusammen mit Ihrem Modell geladen wird.</p> <p data-svelte-h="svelte-wo8rtu">Lassen Sie uns über Code sprechen!</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-ea0vwr">Wenn Sie an der grundlegenden Verwendung von LLMs interessiert sind, ist unsere High-Level-Schnittstelle <a href="pipeline_tutorial"><code>Pipeline</code></a> ein guter Ausgangspunkt. LLMs erfordern jedoch oft fortgeschrittene Funktionen wie Quantisierung und Feinsteuerung des Token-Auswahlschritts, was am besten über <code>generate()</code> erfolgt. Die autoregressive Generierung mit LLMs ist ebenfalls ressourcenintensiv und sollte für einen angemessenen Durchsatz auf einer GPU ausgeführt werden.</p></div> <p data-svelte-h="svelte-16f5yf">Zunächst müssen Sie das Modell laden.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM

	<span class="hljs-meta">>>> </span>model = AutoModelForCausalLM.from_pretrained(
	<span class="hljs-meta">... </span> <span class="hljs-string">"openlm-research/open_llama_7b"</span>, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>
	<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qd47t5">Sie werden zwei Flags in dem Aufruf <code>from_pretrained</code> bemerken:</p> <ul data-svelte-h="svelte-dbmx59"><li><code>device_map</code> stellt sicher, dass das Modell auf Ihre GPU(s) übertragen wird</li> <li><code>load_in_4bit</code> wendet <a href="main_classes/quantization">dynamische 4-Bit-Quantisierung</a> an, um die Ressourcenanforderungen massiv zu reduzieren</li></ul> <p data-svelte-h="svelte-7wdmxs">Es gibt noch andere Möglichkeiten, ein Modell zu initialisieren, aber dies ist eine gute Grundlage, um mit einem LLM zu beginnen.</p> <p data-svelte-h="svelte-1wuri71">Als nächstes müssen Sie Ihre Texteingabe mit einem <a href="tokenizer_summary">tokenizer</a> vorverarbeiten.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openlm-research/open_llama_7b"</span>)
	<span class="hljs-meta">>>> </span>model_inputs = tokenizer([<span class="hljs-string">"A list of colors: red, blue"</span>], return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9ebqu0">Die Variable <code>model_inputs</code> enthält die tokenisierte Texteingabe sowie die Aufmerksamkeitsmaske. Obwohl <code>generate()</code> sein Bestes tut, um die Aufmerksamkeitsmaske abzuleiten, wenn sie nicht übergeben wird, empfehlen wir, sie für optimale Ergebnisse wann immer möglich zu übergeben.</p> <p data-svelte-h="svelte-pk2vcp">Rufen Sie schließlich die Methode <code>generate()</code> auf, um die generierten Token zurückzugeben, die vor dem Drucken in Text umgewandelt werden sollten.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'A list of colors: red, blue, green, yellow, black, white, and brown'</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5a0hj7">Und das war’s! Mit ein paar Zeilen Code können Sie sich die Macht eines LLM zunutze machen.</p> <h2 class="relative group"><a id="häufige-fallstricke" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#häufige-fallstricke"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Häufige Fallstricke</span></h2> <p data-svelte-h="svelte-e1t2cj">Es gibt viele <a href="generation_strategies">Generierungsstrategien</a>, und manchmal sind die Standardwerte für Ihren Anwendungsfall vielleicht nicht geeignet. Wenn Ihre Ausgaben nicht mit dem übereinstimmen, was Sie erwarten, haben wir eine Liste der häufigsten Fallstricke erstellt und wie Sie diese vermeiden können.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openlm-research/open_llama_7b"</span>)
	<span class="hljs-meta">>>> </span>tokenizer.pad_token = tokenizer.eos_token <span class="hljs-comment"># Llama has no pad token by default</span>
	<span class="hljs-meta">>>> </span>model = AutoModelForCausalLM.from_pretrained(
	<span class="hljs-meta">... </span> <span class="hljs-string">"openlm-research/open_llama_7b"</span>, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>
	<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="generierte-ausgabe-ist-zu-kurzlang" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generierte-ausgabe-ist-zu-kurzlang"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generierte Ausgabe ist zu kurz/lang</span></h3> <p data-svelte-h="svelte-ps2fr5">Wenn in der Datei <code>GenerationConfig</code> nichts angegeben ist, gibt <code>generate</code> standardmäßig bis zu 20 Token zurück. Wir empfehlen dringend, <code>max_new_tokens</code> in Ihrem <code>generate</code>-Aufruf manuell zu setzen, um die maximale Anzahl neuer Token zu kontrollieren, die zurückgegeben werden können. Beachten Sie, dass LLMs (genauer gesagt, <a href="https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt" rel="nofollow">decoder-only models</a>) auch die Eingabeaufforderung als Teil der Ausgabe zurückgeben.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>model_inputs = tokenizer([<span class="hljs-string">"A sequence of numbers: 1, 2"</span>], return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>)

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># By default, the output will contain up to 20 tokens</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'A sequence of numbers: 1, 2, 3, 4, 5'</span>

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Setting `max_new_tokens` allows you to control the maximum length</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs, max_new_tokens=<span class="hljs-number">50</span>)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="falscher-generierungsmodus" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#falscher-generierungsmodus"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Falscher Generierungsmodus</span></h3> <p data-svelte-h="svelte-niimxq">Standardmäßig und sofern nicht in der Datei <code>GenerationConfig</code> angegeben, wählt <code>generate</code> bei jeder Iteration das wahrscheinlichste Token aus (gierige Dekodierung). Je nach Aufgabe kann dies unerwünscht sein; kreative Aufgaben wie Chatbots oder das Schreiben eines Aufsatzes profitieren vom Sampling. Andererseits profitieren Aufgaben, bei denen es auf die Eingabe ankommt, wie z.B. Audiotranskription oder Übersetzung, von der gierigen Dekodierung. Aktivieren Sie das Sampling mit <code>do_sample=True</code>. Mehr zu diesem Thema erfahren Sie in diesem <a href="https://huggingface.co/blog/how-to-generate" rel="nofollow">Blogbeitrag</a>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-comment"># Set seed or reproducibility -- you don't need this unless you want full reproducibility</span>
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> set_seed
	<span class="hljs-meta">>>> </span>set_seed(<span class="hljs-number">0</span>)

	<span class="hljs-meta">>>> </span>model_inputs = tokenizer([<span class="hljs-string">"I am a cat."</span>], return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>)

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># LLM + greedy decoding = repetitive, boring output</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'I am a cat. I am a cat. I am a cat. I am a cat'</span>

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># With sampling, the output becomes more creative!</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs, do_sample=<span class="hljs-literal">True</span>)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'I am a cat.\nI just need to be. I am always.\nEvery time'</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="falsche-auffüllseite" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#falsche-auffüllseite"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Falsche Auffüllseite</span></h3> <p data-svelte-h="svelte-1epu98s">LLMs sind <a href="https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt" rel="nofollow">decoder-only</a>-Architekturen, d.h. sie iterieren weiter über Ihre Eingabeaufforderung. Wenn Ihre Eingaben nicht die gleiche Länge haben, müssen sie aufgefüllt werden. Da LLMs nicht darauf trainiert sind, mit aufgefüllten Token fortzufahren, muss Ihre Eingabe links aufgefüllt werden. Vergessen Sie auch nicht, die Aufmerksamkeitsmaske an generate zu übergeben!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-comment"># The tokenizer initialized above has right-padding active by default: the 1st sequence,</span>
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># which is shorter, has padding on the right side. Generation fails.</span>
	<span class="hljs-meta">>>> </span>model_inputs = tokenizer(
	<span class="hljs-meta">... </span> [<span class="hljs-string">"1, 2, 3"</span>, <span class="hljs-string">"A, B, C, D, E"</span>], padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>
	<span class="hljs-meta">... </span>).to(<span class="hljs-string">"cuda"</span>)
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids[<span class="hljs-number">0</span>], skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">''</span>

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># With left-padding, it works as expected!</span>
	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openlm-research/open_llama_7b"</span>, padding_side=<span class="hljs-string">"left"</span>)
	<span class="hljs-meta">>>> </span>tokenizer.pad_token = tokenizer.eos_token <span class="hljs-comment"># Llama has no pad token by default</span>
	<span class="hljs-meta">>>> </span>model_inputs = tokenizer(
	<span class="hljs-meta">... </span> [<span class="hljs-string">"1, 2, 3"</span>, <span class="hljs-string">"A, B, C, D, E"</span>], padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>
	<span class="hljs-meta">... </span>).to(<span class="hljs-string">"cuda"</span>)
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'1, 2, 3, 4, 5, 6,'</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="weitere-ressourcen" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#weitere-ressourcen"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Weitere Ressourcen</span></h2> <p data-svelte-h="svelte-g7p5pv">Während der Prozess der autoregressiven Generierung relativ einfach ist, kann die optimale Nutzung Ihres LLM ein schwieriges Unterfangen sein, da es viele bewegliche Teile gibt. Für Ihre nächsten Schritte, die Ihnen helfen, tiefer in die LLM-Nutzung und das Verständnis einzutauchen:</p> <h3 class="relative group"><a id="fortgeschrittene-nutzung-generieren" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fortgeschrittene-nutzung-generieren"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Fortgeschrittene Nutzung generieren</span></h3> <ol data-svelte-h="svelte-rbjkud"><li><a href="generation_strategies">Leitfaden</a> zur Steuerung verschiedener Generierungsmethoden, zur Einrichtung der Generierungskonfigurationsdatei und zum Streaming der Ausgabe;</li> <li>API-Referenz zu <code>GenerationConfig</code>, <code>generate()</code> und <a href="internal/generation_utils">generate-bezogene Klassen</a>.</li></ol> <h3 class="relative group"><a id="llm-ranglisten" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#llm-ranglisten"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LLM-Ranglisten</span></h3> <ol data-svelte-h="svelte-aeb0ps"><li><a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard" rel="nofollow">Open LLM Leaderboard</a>, das sich auf die Qualität der Open-Source-Modelle konzentriert;</li> <li><a href="https://huggingface.co/spaces/optimum/llm-perf-leaderboard" rel="nofollow">Open LLM-Perf Leaderboard</a>, das sich auf den LLM-Durchsatz konzentriert.</li></ol> <h3 class="relative group"><a id="latenz-und-durchsatz" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#latenz-und-durchsatz"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Latenz und Durchsatz</span></h3> <ol data-svelte-h="svelte-1txeeui"><li><a href="main_classes/quantization">Leitfaden</a> zur dynamischen Quantisierung, der Ihnen zeigt, wie Sie Ihren Speicherbedarf drastisch reduzieren können.</li></ol> <h3 class="relative group"><a id="verwandte-bibliotheken" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#verwandte-bibliotheken"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Verwandte Bibliotheken</span></h3> <ol data-svelte-h="svelte-1ex27cn"><li><a href="https://github.com/huggingface/text-generation-inference" rel="nofollow">text-generation-inference</a>, ein produktionsreifer Server für LLMs;</li> <li><a href="https://github.com/huggingface/optimum" rel="nofollow"><code>optimum</code></a>, eine Erweiterung von 🤗 Transformers, die für bestimmte Hardware-Geräte optimiert.</li></ol> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/de/llm_tutorial.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_4w7cbd = {
	assets: "/docs/transformers/pr_33913/de",
	base: "/docs/transformers/pr_33913/de",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/transformers/pr_33913/de/_app/immutable/entry/start.4cb54951.js"),
	import("/docs/transformers/pr_33913/de/_app/immutable/entry/app.8a9a40b8.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 9],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 46.2 kB
Xet hash:: 0bf912ec5c742f82e1fb085be7f733306e4d0dff1b812d23ca8bf52fb9063da4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.