Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / transformers /pr_33913 /ja /torchscript.html

39.3 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Export to TorchScript","local":"export-to-torchscript","sections":[{"title":"TorchScript flag and tied weights","local":"torchscript-flag-and-tied-weights","sections":[],"depth":2},{"title":"Dummy inputs and standard lengths","local":"dummy-inputs-and-standard-lengths","sections":[],"depth":2},{"title":"Using TorchScript in Python","local":"using-torchscript-in-python","sections":[{"title":"Saving a model","local":"saving-a-model","sections":[],"depth":3},{"title":"Loading a model","local":"loading-a-model","sections":[],"depth":3},{"title":"Using a traced model for inference","local":"using-a-traced-model-for-inference","sections":[],"depth":3}],"depth":2},{"title":"Deploy Hugging Face TorchScript models to AWS with the Neuron SDK","local":"deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk","sections":[{"title":"Implications","local":"implications","sections":[],"depth":3},{"title":"Dependencies","local":"dependencies","sections":[],"depth":3},{"title":"Converting a model for AWS Neuron","local":"converting-a-model-for-aws-neuron","sections":[],"depth":3}],"depth":2}],"depth":1}">
	<link href="/docs/transformers/pr_33913/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/entry/start.17a8f5f1.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/scheduler.9bc65507.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/singletons.01391f4e.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/index.3b203c72.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/paths.d7050e6d.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/entry/app.ec1e8c3e.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/index.707bf1b6.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/nodes/0.29a283e1.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/nodes/165.dba5add3.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/Tip.c2ecdbf4.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/CodeBlock.54a9f38d.js">
	<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/EditOnGithub.922df6ba.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Export to TorchScript","local":"export-to-torchscript","sections":[{"title":"TorchScript flag and tied weights","local":"torchscript-flag-and-tied-weights","sections":[],"depth":2},{"title":"Dummy inputs and standard lengths","local":"dummy-inputs-and-standard-lengths","sections":[],"depth":2},{"title":"Using TorchScript in Python","local":"using-torchscript-in-python","sections":[{"title":"Saving a model","local":"saving-a-model","sections":[],"depth":3},{"title":"Loading a model","local":"loading-a-model","sections":[],"depth":3},{"title":"Using a traced model for inference","local":"using-a-traced-model-for-inference","sections":[],"depth":3}],"depth":2},{"title":"Deploy Hugging Face TorchScript models to AWS with the Neuron SDK","local":"deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk","sections":[{"title":"Implications","local":"implications","sections":[],"depth":3},{"title":"Dependencies","local":"dependencies","sections":[],"depth":3},{"title":"Converting a model for AWS Neuron","local":"converting-a-model-for-aws-neuron","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="export-to-torchscript" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#export-to-torchscript"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Export to TorchScript</span></h1> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-ptvqxn">これはTorchScriptを使用した実験の最初であり、可変入力サイズのモデルに対するその能力をまだ探求中です。これは私たちの関心の焦点であり、今後のリリースでは、より柔軟な実装や、PythonベースのコードとコンパイルされたTorchScriptを比較するベンチマークを含む、より多くのコード例で詳細な分析を行います。</p></div> <p data-svelte-h="svelte-1y8u98h"><a href="https://pytorch.org/docs/stable/jit.html" rel="nofollow">TorchScriptのドキュメント</a>によれば：</p> <blockquote data-svelte-h="svelte-bbe93u"><p>TorchScriptは、PyTorchコードから直列化および最適化可能なモデルを作成する方法です。</p></blockquote> <p data-svelte-h="svelte-q151zz">TorchScriptを使用すると、効率志向のC++プログラムなど、他のプログラムでモデルを再利用できるようになります。PyTorchベースのPythonプログラム以外の環境で🤗 Transformersモデルをエクスポートして使用するためのインターフェースを提供しています。ここでは、TorchScriptを使用してモデルをエクスポートし、使用する方法を説明します。</p> <p data-svelte-h="svelte-rp9w2r">モデルをエクスポートするには、次の2つの要件があります：</p> <ul data-svelte-h="svelte-llh74c"><li><code>torchscript</code>フラグを使用したモデルのインスタンス化</li> <li>ダミーの入力を使用したフォワードパス</li></ul> <p data-svelte-h="svelte-wcmnvf">これらの必要条件は、以下で詳細に説明されているように、開発者が注意する必要があるいくつかのことを意味します。</p> <h2 class="relative group"><a id="torchscript-flag-and-tied-weights" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torchscript-flag-and-tied-weights"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TorchScript flag and tied weights</span></h2> <p data-svelte-h="svelte-1kh9vu9"><code>torchscript</code>フラグは、ほとんどの🤗 Transformers言語モデルにおいて、<code>Embedding</code>レイヤーと<code>Decoding</code>レイヤー間で重みが連結されているため必要です。
	TorchScriptでは、重みが連結されているモデルをエクスポートすることはできませんので、事前に重みを切り離して複製する必要があります。</p> <p data-svelte-h="svelte-ixvxgj"><code>torchscript</code>フラグを使用してインスタンス化されたモデルは、<code>Embedding</code>レイヤーと<code>Decoding</code>レイヤーが分離されており、そのため後でトレーニングしてはいけません。
	トレーニングは、これらの2つのレイヤーを非同期にする可能性があり、予期しない結果をもたらす可能性があります。</p> <p data-svelte-h="svelte-itcrsj">言語モデルヘッドを持たないモデルには言及しませんが、これらのモデルには連結された重みが存在しないため、<code>torchscript</code>フラグなしで安全にエクスポートできます。</p> <h2 class="relative group"><a id="dummy-inputs-and-standard-lengths" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dummy-inputs-and-standard-lengths"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dummy inputs and standard lengths</span></h2> <p data-svelte-h="svelte-44fh6t">ダミー入力はモデルのフォワードパスに使用されます。入力の値はレイヤーを通じて伝播される間、PyTorchは各テンソルに実行された異なる操作を追跡します。これらの記録された操作は、モデルの<em>トレース</em>を作成するために使用されます。</p> <p data-svelte-h="svelte-7rv39">トレースは入力の寸法に対して作成されます。そのため、ダミー入力の寸法に制約され、他のシーケンス長やバッチサイズでは動作しません。異なるサイズで試すと、以下のエラーが発生します：</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->`The expanded <span class="hljs-built_in">size</span> of the tensor (<span class="hljs-number">3</span>) must match the existing <span class="hljs-built_in">size</span> (<span class="hljs-number">7</span>) at non-singleton <span class="hljs-keyword">dimension</span> <span class="hljs-number">2</span>`<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pderge">お勧めしますのは、モデルの推論中に供給される最大の入力と同じ大きさのダミー入力サイズでモデルをトレースすることです。パディングを使用して不足値を補完することもできます。ただし、モデルがより大きな入力サイズでトレースされるため、行列の寸法も大きくなり、より多くの計算が発生します。</p> <p data-svelte-h="svelte-114ekvc">異なるシーケンス長のモデルをエクスポートする際に、各入力に対して実行される演算の総数に注意して、パフォーマンスを密接にフォローすることをお勧めします。</p> <h2 class="relative group"><a id="using-torchscript-in-python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-torchscript-in-python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using TorchScript in Python</span></h2> <p data-svelte-h="svelte-1rrj7ee">このセクションでは、モデルの保存と読み込み、および推論にトレースを使用する方法を示します。</p> <h3 class="relative group"><a id="saving-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#saving-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Saving a model</span></h3> <p data-svelte-h="svelte-7qqugy">TorchScriptで<code>BertModel</code>をエクスポートするには、<code>BertConfig</code>クラスから<code>BertModel</code>をインスタンス化し、それをファイル名<code>traced_bert.pt</code>でディスクに保存します：</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig
	<span class="hljs-keyword">import</span> torch

	enc = BertTokenizer.from_pretrained(<span class="hljs-string">"google-bert/bert-base-uncased"</span>)

	<span class="hljs-comment"># Tokenizing input text</span>
	text = <span class="hljs-string">"[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"</span>
	tokenized_text = enc.tokenize(text)

	<span class="hljs-comment"># Masking one of the input tokens</span>
	masked_index = <span class="hljs-number">8</span>
	tokenized_text[masked_index] = <span class="hljs-string">"[MASK]"</span>
	indexed_tokens = enc.convert_tokens_to_ids(tokenized_text)
	segments_ids = [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]

	<span class="hljs-comment"># Creating a dummy input</span>
	tokens_tensor = torch.tensor([indexed_tokens])
	segments_tensors = torch.tensor([segments_ids])
	dummy_input = [tokens_tensor, segments_tensors]

	<span class="hljs-comment"># Initializing the model with the torchscript flag</span>
	<span class="hljs-comment"># Flag set to True even though it is not necessary as this model does not have an LM Head.</span>
	config = BertConfig(
	vocab_size_or_config_json_file=<span class="hljs-number">32000</span>,
	hidden_size=<span class="hljs-number">768</span>,
	num_hidden_layers=<span class="hljs-number">12</span>,
	num_attention_heads=<span class="hljs-number">12</span>,
	intermediate_size=<span class="hljs-number">3072</span>,
	torchscript=<span class="hljs-literal">True</span>,
	)

	<span class="hljs-comment"># Instantiating the model</span>
	model = BertModel(config)

	<span class="hljs-comment"># The model needs to be in evaluation mode</span>
	model.<span class="hljs-built_in">eval</span>()

	<span class="hljs-comment"># If you are instantiating the model with from_pretrained you can also easily set the TorchScript flag</span>
	model = BertModel.from_pretrained(<span class="hljs-string">"google-bert/bert-base-uncased"</span>, torchscript=<span class="hljs-literal">True</span>)

	<span class="hljs-comment"># Creating the trace</span>
	traced_model = torch.jit.trace(model, [tokens_tensor, segments_tensors])
	torch.jit.save(traced_model, <span class="hljs-string">"traced_bert.pt"</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="loading-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#loading-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Loading a model</span></h3> <p data-svelte-h="svelte-1ncupfh">以前に保存した <code>BertModel</code>、<code>traced_bert.pt</code> をディスクから読み込んで、以前に初期化した <code>dummy_input</code> で使用できます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->loaded_model = torch.jit.load(<span class="hljs-string">"traced_bert.pt"</span>)
	loaded_model.<span class="hljs-built_in">eval</span>()

	all_encoder_layers, pooled_output = loaded_model(*dummy_input)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="using-a-traced-model-for-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-a-traced-model-for-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using a traced model for inference</span></h3> <p data-svelte-h="svelte-an02is">トレースモデルを使用して推論を行うには、その <code>__call__</code> ダンダーメソッドを使用します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->traced_model(tokens_tensor, segments_tensors)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-hugging-face-torchscript-models-to-aws-with-the-neuron-sdk"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy Hugging Face TorchScript models to AWS with the Neuron SDK</span></h2> <p data-svelte-h="svelte-1tpyqua">AWSはクラウドでの低コストで高性能な機械学習推論向けに <a href="https://aws.amazon.com/ec2/instance-types/inf1/" rel="nofollow">Amazon EC2 Inf1</a> インスタンスファミリーを導入しました。Inf1インスタンスはAWS Inferentiaチップによって駆動され、ディープラーニング推論ワークロードに特化したカスタムビルドのハードウェアアクセラレータです。<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/#" rel="nofollow">AWS Neuron</a> はInferentia用のSDKで、トランスフォーマーモデルをトレースして最適化し、Inf1に展開するためのサポートを提供します。</p> <p data-svelte-h="svelte-1rx34yk">Neuron SDK が提供するもの:</p> <ol data-svelte-h="svelte-wvdsqe"><li>クラウドでの推論のためにTorchScriptモデルをトレースして最適化するための、1行のコード変更で使用できる簡単なAPI。</li> <li><a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/benchmark/" rel="nofollow">改善されたコストパフォーマンス</a> のためのボックス外のパフォーマンス最適化。</li> <li><a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/bert_tutorial/tutorial_pretrained_bert.html" rel="nofollow">PyTorch</a> または <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/tensorflow/huggingface_bert/huggingface_bert.html" rel="nofollow">TensorFlow</a> で構築されたHugging Faceトランスフォーマーモデルへのサポート。</li></ol> <h3 class="relative group"><a id="implications" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implications"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implications</span></h3> <p data-svelte-h="svelte-1tfqdw1">BERT（Bidirectional Encoder Representations from Transformers）アーキテクチャやその変種（<a href="https://huggingface.co/docs/transformers/main/model_doc/distilbert" rel="nofollow">distilBERT</a> や <a href="https://huggingface.co/docs/transformers/main/model_doc/roberta" rel="nofollow">roBERTa</a> など）に基づくトランスフォーマーモデルは、非生成タスク（抽出型質問応答、シーケンス分類、トークン分類など）において、Inf1上で最適に動作します。ただし、テキスト生成タスクも <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/src/examples/pytorch/transformers-marianmt.html" rel="nofollow">AWS Neuron MarianMT チュートリアル</a> に従ってInf1上で実行できます。Inferentiaでボックス外で変換できるモデルに関する詳細情報は、Neuronドキュメンテーションの <a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/models/models-inferentia.html#models-inferentia" rel="nofollow">Model Architecture Fit</a> セクションにあります。</p> <h3 class="relative group"><a id="dependencies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dependencies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dependencies</span></h3> <p data-svelte-h="svelte-1tj3zis">モデルをAWS Neuronに変換するには、<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/neuron-guide/neuron-frameworks/pytorch-neuron/index.html#installation-guide" rel="nofollow">Neuron SDK 環境</a> が必要で、<a href="https://docs.aws.amazon.com/dlami/latest/devguide/tutorial-inferentia-launching.html" rel="nofollow">AWS Deep Learning AMI</a> に事前に構成されています。</p> <h3 class="relative group"><a id="converting-a-model-for-aws-neuron" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#converting-a-model-for-aws-neuron"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Converting a model for AWS Neuron</span></h3> <p data-svelte-h="svelte-4o4276">モデルをAWS NEURON用に変換するには、<a href="torchscript#using-torchscript-in-python">PythonでTorchScriptを使用する</a> と同じコードを使用して <code>BertModel</code> をトレースします。Python APIを介してNeuron SDKのコンポーネントにアクセスするために、<code>torch.neuron</code> フレームワーク拡張をインポートします。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> BertModel, BertTokenizer, BertConfig
	<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">import</span> torch.neuron<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bu3neu">次の行を変更するだけで済みます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-deletion">- torch.jit.trace(model, [tokens_tensor, segments_tensors])</span>
	<span class="hljs-addition">+ torch.neuron.trace(model, [token_tensor, segments_tensors])</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-jz1ko9">これにより、Neuron SDKはモデルをトレースし、Inf1インスタンス向けに最適化します。</p> <p data-svelte-h="svelte-14npq7y">AWS Neuron SDKの機能、ツール、サンプルチュートリアル、最新のアップデートについて詳しく知りたい場合は、<a href="https://awsdocs-neuron.readthedocs-hosted.com/en/latest/index.html" rel="nofollow">AWS NeuronSDK ドキュメンテーション</a> をご覧ください。</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/torchscript.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_17iw4ji = {
	assets: "/docs/transformers/pr_33913/ja",
	base: "/docs/transformers/pr_33913/ja",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/transformers/pr_33913/ja/_app/immutable/entry/start.17a8f5f1.js"),
	import("/docs/transformers/pr_33913/ja/_app/immutable/entry/app.ec1e8c3e.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 165],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 39.3 kB
Xet hash:: 751b1e4e571b233bbd188e7118b3485c1a550ce0bed6d71b837c0ecc00515170

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.