Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / transformers /pr_33913 /ja /llm_tutorial.html

47.9 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Generation with LLMs","local":"generation-with-llms","sections":[{"title":"Generate text","local":"generate-text","sections":[],"depth":2},{"title":"Common pitfalls","local":"common-pitfalls","sections":[{"title":"Generated output is too short/long","local":"generated-output-is-too-shortlong","sections":[],"depth":3},{"title":"Incorrect generation mode","local":"incorrect-generation-mode","sections":[],"depth":3},{"title":"Wrong padding side","local":"wrong-padding-side","sections":[],"depth":3}],"depth":2},{"title":"Further resources","local":"further-resources","sections":[{"title":"Advanced generate usage","local":"advanced-generate-usage","sections":[],"depth":3},{"title":"LLM leaderboards","local":"llm-leaderboards","sections":[],"depth":3},{"title":"Latency and throughput","local":"latency-and-throughput","sections":[],"depth":3},{"title":"Related libraries","local":"related-libraries","sections":[],"depth":3}],"depth":2}],"depth":1}">
	<link href="/docs/transformers/pr_33913/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/entry/start.17a8f5f1.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/scheduler.9bc65507.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/singletons.01391f4e.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/index.3b203c72.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/paths.d7050e6d.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/entry/app.ec1e8c3e.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/index.707bf1b6.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/nodes/0.29a283e1.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/nodes/29.9749c8ce.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/Tip.c2ecdbf4.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/CodeBlock.54a9f38d.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/DocNotebookDropdown.41f65cb5.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/globals.7f7f1b26.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/EditOnGithub.922df6ba.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Generation with LLMs","local":"generation-with-llms","sections":[{"title":"Generate text","local":"generate-text","sections":[],"depth":2},{"title":"Common pitfalls","local":"common-pitfalls","sections":[{"title":"Generated output is too short/long","local":"generated-output-is-too-shortlong","sections":[],"depth":3},{"title":"Incorrect generation mode","local":"incorrect-generation-mode","sections":[],"depth":3},{"title":"Wrong padding side","local":"wrong-padding-side","sections":[],"depth":3}],"depth":2},{"title":"Further resources","local":"further-resources","sections":[{"title":"Advanced generate usage","local":"advanced-generate-usage","sections":[],"depth":3},{"title":"LLM leaderboards","local":"llm-leaderboards","sections":[],"depth":3},{"title":"Latency and throughput","local":"latency-and-throughput","sections":[],"depth":3},{"title":"Related libraries","local":"related-libraries","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="generation-with-llms" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generation-with-llms"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generation with LLMs</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <p data-svelte-h="svelte-1p8oyus">LLM、またはLarge Language Models（大規模言語モデル）は、テキスト生成の鍵となる要素です。要するに、これらは大規模な事前訓練済みトランスフォーマーモデルで、与えられた入力テキストに基づいて次の単語（または、より正確にはトークン）を予測するように訓練されています。トークンを1つずつ予測するため、モデルを呼び出すだけでは新しい文を生成するために何かより精巧なことをする必要があります。自己回帰生成を行う必要があります。</p> <p data-svelte-h="svelte-fzitdn">自己回帰生成は、推論時の手続きで、いくつかの初期入力を与えた状態で、モデルを反復的に呼び出す手法です。🤗 Transformersでは、これは<a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationMixin.generate">generate()</a>メソッドによって処理され、これは生成能力を持つすべてのモデルで利用可能です。</p> <p data-svelte-h="svelte-10d5jzs">このチュートリアルでは、以下のことを示します：</p> <ul data-svelte-h="svelte-114gruu"><li>LLMを使用してテキストを生成する方法</li> <li>一般的な落とし穴を回避する方法</li> <li>LLMを最大限に活用するための次のステップ</li></ul> <p data-svelte-h="svelte-5jpx9c">始める前に、必要なライブラリがすべてインストールされていることを確認してください：</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install transformers bitsandbytes>=0.39.0 -q<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="generate-text" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generate-text"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generate text</span></h2> <p data-svelte-h="svelte-1kvqgos"><a href="tasks/language_modeling">因果言語モデリング</a>のためにトレーニングされた言語モデルは、テキストトークンのシーケンスを入力として受け取り、次のトークンの確率分布を返します。</p> <figure class="image table text-center m-0 w-full" data-svelte-h="svelte-hjgddv"><video style="max-width: 90%; margin: auto;" autoplay="" loop="" muted="" playsinline="" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/assisted-generation/gif_1_1080p.mov"></video> <figcaption>"Forward pass of an LLM"</figcaption></figure> <p data-svelte-h="svelte-1vvn9v3">LLM（Language Model）による自己回帰生成の重要な側面の1つは、この確率分布から次のトークンを選択する方法です。このステップでは、次のイテレーションのためのトークンが得られる限り、何でも可能です。これは、確率分布から最も可能性の高いトークンを選択するだけのシンプルな方法から、結果の分布からサンプリングする前に数々の変換を適用するほど複雑な方法まで、あらゆる方法が考えられます。</p> <figure class="image table text-center m-0 w-full" data-svelte-h="svelte-1uqc9hk"><video style="max-width: 90%; margin: auto;" autoplay="" loop="" muted="" playsinline="" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/assisted-generation/gif_2_1080p.mov"></video> <figcaption>"Autoregressive generation iteratively selects the next token from a probability distribution to generate text"</figcaption></figure> <p data-svelte-h="svelte-1q7bq18">上記のプロセスは、ある停止条件が満たされるまで反復的に繰り返されます。理想的には、停止条件はモデルによって指示され、モデルは終了シーケンス（<code>EOS</code>）トークンを出力するタイミングを学習すべきです。これがそうでない場合、生成はあらかじめ定義された最大長に達したときに停止します。</p> <p data-svelte-h="svelte-7k2y8p">トークン選択ステップと停止条件を適切に設定することは、モデルがタスクで期待どおりに振る舞うために重要です。それが、各モデルに関連付けられた <a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationConfig">GenerationConfig</a> ファイルがある理由であり、これには優れたデフォルトの生成パラメータ化が含まれ、モデルと一緒に読み込まれます。</p> <p data-svelte-h="svelte-1gzvd9k">コードについて話しましょう！</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-12nwfyd">基本的なLLMの使用に興味がある場合、高レベルの <a href="pipeline_tutorial"><code>Pipeline</code></a> インターフェースが良い出発点です。ただし、LLMはしばしば量子化やトークン選択ステップの細かい制御などの高度な機能が必要であり、これは <a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationMixin.generate">generate()</a> を介して最良に行われます。LLMとの自己回帰生成はリソースが多く必要であり、適切なスループットのためにGPUで実行する必要があります。</p></div> <p data-svelte-h="svelte-1mik2gq">まず、モデルを読み込む必要があります。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM

	<span class="hljs-meta">>>> </span>model = AutoModelForCausalLM.from_pretrained(
	<span class="hljs-meta">... </span> <span class="hljs-string">"openlm-research/open_llama_7b"</span>, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>
	<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-dj98s8"><code>from_pretrained</code> 呼び出しで2つのフラグがあることに注意してください：</p> <ul data-svelte-h="svelte-k3ww82"><li><code>device_map</code> はモデルをあなたのGPUに移動させます</li> <li><code>load_in_4bit</code> は<a href="main_classes/quantization">4ビットの動的量子化</a>を適用してリソース要件を大幅に削減します</li></ul> <p data-svelte-h="svelte-1ved9f1">モデルを初期化する他の方法もありますが、これはLLMを始めるための良い基準です。</p> <p data-svelte-h="svelte-1vkr8y3">次に、<a href="tokenizer_summary">トークナイザ</a>を使用してテキスト入力を前処理する必要があります。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openlm-research/open_llama_7b"</span>)
	<span class="hljs-meta">>>> </span>model_inputs = tokenizer([<span class="hljs-string">"A list of colors: red, blue"</span>], return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sm6di1"><code>model_inputs</code> 変数は、トークン化されたテキスト入力とアテンションマスクを保持しています。 <a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationMixin.generate">generate()</a> は、アテンションマスクが渡されていない場合でも、最善の努力をしてそれを推測しようとしますが、できる限り渡すことをお勧めします。最適な結果を得るためです。</p> <p data-svelte-h="svelte-uamvnv">最後に、<a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationMixin.generate">generate()</a> メソッドを呼び出して生成されたトークンを取得し、それを表示する前にテキストに変換する必要があります。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'A list of colors: red, blue, green, yellow, black, white, and brown'</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1fgh7wz">これで完了です！わずかなコード行数で、LLM（Large Language Model）のパワーを活用できます。</p> <h2 class="relative group"><a id="common-pitfalls" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#common-pitfalls"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Common pitfalls</span></h2> <p data-svelte-h="svelte-k0wny5"><a href="generation_strategies">生成戦略</a>はたくさんあり、デフォルトの値があなたのユースケースに適していないことがあります。出力が期待通りでない場合、最も一般的な落とし穴とその回避方法のリストを作成しました。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer

	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openlm-research/open_llama_7b"</span>)
	<span class="hljs-meta">>>> </span>tokenizer.pad_token = tokenizer.eos_token <span class="hljs-comment"># Llama has no pad token by default</span>
	<span class="hljs-meta">>>> </span>model = AutoModelForCausalLM.from_pretrained(
	<span class="hljs-meta">... </span> <span class="hljs-string">"openlm-research/open_llama_7b"</span>, device_map=<span class="hljs-string">"auto"</span>, load_in_4bit=<span class="hljs-literal">True</span>
	<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="generated-output-is-too-shortlong" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generated-output-is-too-shortlong"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generated output is too short/long</span></h3> <p data-svelte-h="svelte-ag32g9"><a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationConfig">GenerationConfig</a> ファイルで指定されていない場合、<code>generate</code> はデフォルトで最大で 20 トークンまで返します。我々は <code>generate</code> コールで <code>max_new_tokens</code> を手動で設定することを強くお勧めします。これにより、返される新しいトークンの最大数を制御できます。LLM（正確には、<a href="https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt" rel="nofollow">デコーダー専用モデル</a>）も出力の一部として入力プロンプトを返すことに注意してください。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>model_inputs = tokenizer([<span class="hljs-string">"A sequence of numbers: 1, 2"</span>], return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>)

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># By default, the output will contain up to 20 tokens</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'A sequence of numbers: 1, 2, 3, 4, 5'</span>

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Setting `max_new_tokens` allows you to control the maximum length</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs, max_new_tokens=<span class="hljs-number">50</span>)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'A sequence of numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,'</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="incorrect-generation-mode" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#incorrect-generation-mode"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Incorrect generation mode</span></h3> <p data-svelte-h="svelte-10sfz0k">デフォルトでは、 <a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationConfig">GenerationConfig</a> ファイルで指定されていない限り、<code>generate</code> は各イテレーションで最も可能性の高いトークンを選択します（貪欲デコーディング）。タスクに応じて、これは望ましくないことがあります。チャットボットやエッセイのような創造的なタスクでは、サンプリングが有益です。一方、音声の転写や翻訳のような入力に基づくタスクでは、貪欲デコーディングが有益です。<code>do_sample=True</code> でサンプリングを有効にできます。このトピックについての詳細は、この<a href="https://huggingface.co/blog/how-to-generate" rel="nofollow">ブログポスト</a>で学ぶことができます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-comment"># Set seed or reproducibility -- you don't need this unless you want full reproducibility</span>
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> set_seed
	<span class="hljs-meta">>>> </span>set_seed(<span class="hljs-number">0</span>)

	<span class="hljs-meta">>>> </span>model_inputs = tokenizer([<span class="hljs-string">"I am a cat."</span>], return_tensors=<span class="hljs-string">"pt"</span>).to(<span class="hljs-string">"cuda"</span>)

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># LLM + greedy decoding = repetitive, boring output</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'I am a cat. I am a cat. I am a cat. I am a cat'</span>

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># With sampling, the output becomes more creative!</span>
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs, do_sample=<span class="hljs-literal">True</span>)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'I am a cat.\nI just need to be. I am always.\nEvery time'</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="wrong-padding-side" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wrong-padding-side"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Wrong padding side</span></h3> <p data-svelte-h="svelte-fd0pc8">LLM（Large Language Models）は<a href="https://huggingface.co/learn/nlp-course/chapter1/6?fw=pt" rel="nofollow">デコーダー専用</a>のアーキテクチャであり、入力プロンプトを繰り返し処理することを意味します。入力が同じ長さでない場合、それらをパディングする必要があります。LLMはパッドトークンからの続きを学習していないため、入力は左パディングする必要があります。また、生成に対して注目マスクを渡し忘れないようにしてください！</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-comment"># The tokenizer initialized above has right-padding active by default: the 1st sequence,</span>
	<span class="hljs-meta">>>> </span><span class="hljs-comment"># which is shorter, has padding on the right side. Generation fails.</span>
	<span class="hljs-meta">>>> </span>model_inputs = tokenizer(
	<span class="hljs-meta">... </span> [<span class="hljs-string">"1, 2, 3"</span>, <span class="hljs-string">"A, B, C, D, E"</span>], padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>
	<span class="hljs-meta">... </span>).to(<span class="hljs-string">"cuda"</span>)
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids[<span class="hljs-number">0</span>], skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">''</span>

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># With left-padding, it works as expected!</span>
	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openlm-research/open_llama_7b"</span>, padding_side=<span class="hljs-string">"left"</span>)
	<span class="hljs-meta">>>> </span>tokenizer.pad_token = tokenizer.eos_token <span class="hljs-comment"># Llama has no pad token by default</span>
	<span class="hljs-meta">>>> </span>model_inputs = tokenizer(
	<span class="hljs-meta">... </span> [<span class="hljs-string">"1, 2, 3"</span>, <span class="hljs-string">"A, B, C, D, E"</span>], padding=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>
	<span class="hljs-meta">... </span>).to(<span class="hljs-string">"cuda"</span>)
	<span class="hljs-meta">>>> </span>generated_ids = model.generate(**model_inputs)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generated_ids, skip_special_tokens=<span class="hljs-literal">True</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">'1, 2, 3, 4, 5, 6,'</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="further-resources" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#further-resources"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Further resources</span></h2> <p data-svelte-h="svelte-1d601do">オートリグレッシブ生成プロセスは比較的簡単ですが、LLMを最大限に活用することは多くの要素が絡むため、挑戦的な試みとなります。LLMの使用と理解をさらに深めるための次のステップについては以下のリソースをご覧ください。</p> <h3 class="relative group"><a id="advanced-generate-usage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-generate-usage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced generate usage</span></h3> <ol data-svelte-h="svelte-ogfvi4"><li><a href="generation_strategies">ガイド</a>：異なる生成方法を制御する方法、生成構成ファイルの設定方法、出力のストリーミング方法についてのガイド;</li> <li><a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationConfig">GenerationConfig</a>、<a href="/docs/transformers/pr_33913/ja/main_classes/text_generation#transformers.GenerationMixin.generate">generate()</a>、および<a href="internal/generation_utils">生成関連クラス</a>に関するAPIリファレンス。</li></ol> <h3 class="relative group"><a id="llm-leaderboards" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#llm-leaderboards"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>LLM leaderboards</span></h3> <ol data-svelte-h="svelte-1mag1j5"><li><a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard" rel="nofollow">Open LLM リーダーボード</a>：オープンソースモデルの品質に焦点を当てたリーダーボード;</li> <li><a href="https://huggingface.co/spaces/optimum/llm-perf-leaderboard" rel="nofollow">Open LLM-Perf リーダーボード</a>：LLMのスループットに焦点を当てたリーダーボード。</li></ol> <h3 class="relative group"><a id="latency-and-throughput" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#latency-and-throughput"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Latency and throughput</span></h3> <ol data-svelte-h="svelte-1d7xcac"><li><a href="main_classes/quantization">ガイド</a>：ダイナミッククオンタイズに関するガイド。これによりメモリ要件を劇的に削減する方法が示されています。</li></ol> <h3 class="relative group"><a id="related-libraries" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#related-libraries"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Related libraries</span></h3> <ol data-svelte-h="svelte-1v1y2ns"><li><a href="https://github.com/huggingface/text-generation-inference" rel="nofollow"><code>text-generation-inference</code></a>：LLM用の本番向けサーバー;</li> <li><a href="https://github.com/huggingface/optimum" rel="nofollow"><code>optimum</code></a>：特定のハードウェアデバイス向けに最適化された🤗 Transformersの拡張。</li></ol> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/llm_tutorial.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_17iw4ji = {
	assets: "/docs/transformers/pr_33913/ja",
	base: "/docs/transformers/pr_33913/ja",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/transformers/pr_33913/ja/_app/immutable/entry/start.17a8f5f1.js"),
	import("/docs/transformers/pr_33913/ja/_app/immutable/entry/app.ec1e8c3e.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 29],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 47.9 kB
Xet hash:: a1440e8d3ce37e8c314acbc45136678594da096e57147ad1b00173addcc60d8b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.