Buckets:

download
raw
88.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;What 🤗 Transformers can do&quot;,&quot;local&quot;:&quot;what--transformers-can-do&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Audio&quot;,&quot;local&quot;:&quot;audio&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Audio classification&quot;,&quot;local&quot;:&quot;audio-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Automatic speech recognition&quot;,&quot;local&quot;:&quot;automatic-speech-recognition&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Computer vision&quot;,&quot;local&quot;:&quot;computer-vision&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Image classification&quot;,&quot;local&quot;:&quot;image-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Object detection&quot;,&quot;local&quot;:&quot;object-detection&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Image segmentation&quot;,&quot;local&quot;:&quot;image-segmentation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Depth estimation&quot;,&quot;local&quot;:&quot;depth-estimation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Natural language processing&quot;,&quot;local&quot;:&quot;natural-language-processing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Text classification&quot;,&quot;local&quot;:&quot;text-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Token classification&quot;,&quot;local&quot;:&quot;token-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Question answering&quot;,&quot;local&quot;:&quot;question-answering&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Summarization&quot;,&quot;local&quot;:&quot;summarization&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Translation&quot;,&quot;local&quot;:&quot;translation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;言語モデリング&quot;,&quot;local&quot;:&quot;言語モデリング&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Multimodal&quot;,&quot;local&quot;:&quot;multimodal&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Document question answering&quot;,&quot;local&quot;:&quot;document-question-answering&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/pr_33913/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/entry/start.17a8f5f1.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/scheduler.9bc65507.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/singletons.01391f4e.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/index.3b203c72.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/paths.d7050e6d.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/entry/app.ec1e8c3e.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/index.707bf1b6.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/nodes/0.29a283e1.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/nodes/134.0b448a51.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/CodeBlock.54a9f38d.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ja/_app/immutable/chunks/EditOnGithub.922df6ba.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;What 🤗 Transformers can do&quot;,&quot;local&quot;:&quot;what--transformers-can-do&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Audio&quot;,&quot;local&quot;:&quot;audio&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Audio classification&quot;,&quot;local&quot;:&quot;audio-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Automatic speech recognition&quot;,&quot;local&quot;:&quot;automatic-speech-recognition&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Computer vision&quot;,&quot;local&quot;:&quot;computer-vision&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Image classification&quot;,&quot;local&quot;:&quot;image-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Object detection&quot;,&quot;local&quot;:&quot;object-detection&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Image segmentation&quot;,&quot;local&quot;:&quot;image-segmentation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Depth estimation&quot;,&quot;local&quot;:&quot;depth-estimation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Natural language processing&quot;,&quot;local&quot;:&quot;natural-language-processing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Text classification&quot;,&quot;local&quot;:&quot;text-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Token classification&quot;,&quot;local&quot;:&quot;token-classification&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Question answering&quot;,&quot;local&quot;:&quot;question-answering&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Summarization&quot;,&quot;local&quot;:&quot;summarization&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Translation&quot;,&quot;local&quot;:&quot;translation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;言語モデリング&quot;,&quot;local&quot;:&quot;言語モデリング&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Multimodal&quot;,&quot;local&quot;:&quot;multimodal&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Document question answering&quot;,&quot;local&quot;:&quot;document-question-answering&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="what--transformers-can-do" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what--transformers-can-do"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What 🤗 Transformers can do</span></h1> <p data-svelte-h="svelte-ek4mfb">🤗 Transformersは、自然言語処理(NLP)、コンピュータビジョン、音声処理などの最新の事前学習済みモデルのライブラリです。このライブラリには、Transformerモデルだけでなく、コンピュータビジョンタスク向けのモダンな畳み込みニューラルネットワークなど、Transformer以外のモデルも含まれています。現代のスマートフォン、アプリ、テレビなど、最も人気のある消費者製品の多くには、深層学習技術が使用されています。スマートフォンで撮影した写真から背景オブジェクトを削除したいですか?これはパノプティック・セグメンテーション(まだ何を意味するかわからなくても心配しないでください、以下のセクションで説明します!)のタスクの一例です。</p> <p data-svelte-h="svelte-1rw5e2u">このページでは、🤗 Transformersライブラリを使用して、たった3行のコードで解決できるさまざまな音声および音声、コンピュータビジョン、NLPタスクの概要を提供します!</p> <h2 class="relative group"><a id="audio" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#audio"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Audio</span></h2> <p data-svelte-h="svelte-15dyqe4">音声と音声処理のタスクは、他のモダリティとは少し異なります。なぜなら、入力としての生の音声波形は連続的な信号であるからです。テキストとは異なり、生の音声波形は文章を単語に分割できるようにきれいに分割できません。これを解決するために、通常、生の音声信号は定期的な間隔でサンプリングされます。間隔内でより多くのサンプルを取ると、サンプリングレートが高くなり、音声はより元の音声ソースに近づきます。</p> <p data-svelte-h="svelte-lal6en">以前のアプローチでは、音声を前処理してそれから有用な特徴を抽出しました。しかし、現在では、生の音声波形を特徴エンコーダに直接フィードし、音声表現を抽出することから始めることが一般的です。これにより、前処理ステップが簡素化され、モデルは最も重要な特徴を学習できます。</p> <h3 class="relative group"><a id="audio-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#audio-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Audio classification</span></h3> <p data-svelte-h="svelte-1k2664o">音声分類は、事前に定義されたクラスのセットから音声データにラベルを付けるタスクです。これは多くの具体的なアプリケーションを含む広範なカテゴリであり、いくつかの例は次のとおりです:</p> <ul data-svelte-h="svelte-1qvj2e6"><li>音響シーン分類:音声にシーンラベルを付ける(「オフィス」、「ビーチ」、「スタジアム」)</li> <li>音響イベント検出:音声に音声イベントラベルを付ける(「車のクラクション」、「クジラの呼び声」、「ガラスの破裂」)</li> <li>タギング:複数の音を含む音声にラベルを付ける(鳥の鳴き声、会議でのスピーカー識別)</li> <li>音楽分類:音楽にジャンルラベルを付ける(「メタル」、「ヒップホップ」、「カントリー」)</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>classifier = pipeline(task=<span class="hljs-string">&quot;audio-classification&quot;</span>, model=<span class="hljs-string">&quot;superb/hubert-base-superb-er&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = classifier(<span class="hljs-string">&quot;https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = [{<span class="hljs-string">&quot;score&quot;</span>: <span class="hljs-built_in">round</span>(pred[<span class="hljs-string">&quot;score&quot;</span>], <span class="hljs-number">4</span>), <span class="hljs-string">&quot;label&quot;</span>: pred[<span class="hljs-string">&quot;label&quot;</span>]} <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
<span class="hljs-meta">&gt;&gt;&gt; </span>preds
[{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.4532</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;hap&#x27;</span>},
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.3622</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;sad&#x27;</span>},
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.0943</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;neu&#x27;</span>},
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.0903</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;ang&#x27;</span>}]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="automatic-speech-recognition" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#automatic-speech-recognition"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Automatic speech recognition</span></h3> <p data-svelte-h="svelte-1m26psw">自動音声認識(ASR)は音声をテキストに変換します。これは、人間のコミュニケーションの自然な形式である音声の一部として、部分的にそのような一般的なオーディオタスクの一つです。今日、ASRシステムはスピーカー、スマートフォン、自動車などの「スマート」テクノロジープロダクトに組み込まれています。私たちは仮想アシスタントに音楽を再生してもらったり、リマインダーを設定してもらったり、天気を教えてもらったりできます。</p> <p data-svelte-h="svelte-r87y6j">しかし、Transformerアーキテクチャが助けた主要な課題の一つは、低リソース言語におけるものです。大量の音声データで事前トレーニングし、低リソース言語でラベル付き音声データをわずか1時間だけでファインチューニングすることでも、以前のASRシステムと比較して高品質な結果を得ることができます。以前のASRシステムは100倍以上のラベル付きデータでトレーニングされていましたが、Transformerアーキテクチャはこの問題に貢献しました。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>transcriber = pipeline(task=<span class="hljs-string">&quot;automatic-speech-recognition&quot;</span>, model=<span class="hljs-string">&quot;openai/whisper-small&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>transcriber(<span class="hljs-string">&quot;https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac&quot;</span>)
{<span class="hljs-string">&#x27;text&#x27;</span>: <span class="hljs-string">&#x27; I have a dream that one day this nation will rise up and live out the true meaning of its creed.&#x27;</span>}<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="computer-vision" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#computer-vision"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Computer vision</span></h2> <p data-svelte-h="svelte-18036m9">最初で初めて成功したコンピュータビジョンのタスクの一つは、<a href="glossary#convolution">畳み込みニューラルネットワーク(CNN)</a>を使用して郵便番号の画像を認識することでした。画像はピクセルから構成され、各ピクセルには数値があります。これにより、画像をピクセル値の行列として簡単に表現できます。特定のピクセル値の組み合わせは、画像の色を記述します。</p> <p data-svelte-h="svelte-ymmeyz">コンピュータビジョンのタスクを解決する一般的な方法は次の2つです:</p> <ol data-svelte-h="svelte-v4gh81"><li>畳み込みを使用して、低レベルの特徴から高レベルの抽象的な情報まで、画像の階層的な特徴を学習する。</li> <li>画像をパッチに分割し、各画像パッチが画像全体とどのように関連しているかを徐々に学習するためにTransformerを使用する。CNNが好むボトムアップアプローチとは異なり、これはぼんやりとした画像から始めて徐々に焦点を合わせるようなものです。</li></ol> <h3 class="relative group"><a id="image-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#image-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Image classification</span></h3> <p data-svelte-h="svelte-1mp36i8">画像分類は、事前に定義されたクラスのセットから画像全体にラベルを付けます。多くの分類タスクと同様に、画像分類には多くの実用的な用途があり、その一部は次のとおりです:</p> <ul data-svelte-h="svelte-j3igwn"><li>医療:疾患の検出や患者の健康の監視に使用するために医療画像にラベルを付ける</li> <li>環境:森林伐採の監視、野生地の管理情報、または山火事の検出に使用するために衛星画像にラベルを付ける</li> <li>農業:作物の健康を監視するための作物の画像や、土地利用の監視のための衛星画像にラベルを付ける</li> <li>生態学:野生動物の個体数を監視したり、絶滅危惧種を追跡するために動植物の種の画像にラベルを付ける</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>classifier = pipeline(task=<span class="hljs-string">&quot;image-classification&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = classifier(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg&quot;</span>
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = [{<span class="hljs-string">&quot;score&quot;</span>: <span class="hljs-built_in">round</span>(pred[<span class="hljs-string">&quot;score&quot;</span>], <span class="hljs-number">4</span>), <span class="hljs-string">&quot;label&quot;</span>: pred[<span class="hljs-string">&quot;label&quot;</span>]} <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(*preds, sep=<span class="hljs-string">&quot;\n&quot;</span>)
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.4335</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;lynx, catamount&#x27;</span>}
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.0348</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;cougar, puma, catamount, mountain lion, painter, panther, Felis concolor&#x27;</span>}
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.0324</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;snow leopard, ounce, Panthera uncia&#x27;</span>}
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.0239</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;Egyptian cat&#x27;</span>}
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.0229</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;tiger cat&#x27;</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="object-detection" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#object-detection"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Object detection</span></h3> <p data-svelte-h="svelte-1xlgwtm">画像分類とは異なり、オブジェクト検出は画像内の複数のオブジェクトを識別し、オブジェクトの位置を画像内で定義する境界ボックスによって特定します。オブジェクト検出の例には次のような用途があります:</p> <ul data-svelte-h="svelte-vyf034"><li>自動運転車:他の車両、歩行者、信号機などの日常の交通オブジェクトを検出</li> <li>リモートセンシング:災害モニタリング、都市計画、天候予測</li> <li>欠陥検出:建物のクラックや構造上の損傷、製造上の欠陥を検出</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>detector = pipeline(task=<span class="hljs-string">&quot;object-detection&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = detector(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg&quot;</span>
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = [{<span class="hljs-string">&quot;score&quot;</span>: <span class="hljs-built_in">round</span>(pred[<span class="hljs-string">&quot;score&quot;</span>], <span class="hljs-number">4</span>), <span class="hljs-string">&quot;label&quot;</span>: pred[<span class="hljs-string">&quot;label&quot;</span>], <span class="hljs-string">&quot;box&quot;</span>: pred[<span class="hljs-string">&quot;box&quot;</span>]} <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
<span class="hljs-meta">&gt;&gt;&gt; </span>preds
[{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9865</span>,
<span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;cat&#x27;</span>,
<span class="hljs-string">&#x27;box&#x27;</span>: {<span class="hljs-string">&#x27;xmin&#x27;</span>: <span class="hljs-number">178</span>, <span class="hljs-string">&#x27;ymin&#x27;</span>: <span class="hljs-number">154</span>, <span class="hljs-string">&#x27;xmax&#x27;</span>: <span class="hljs-number">882</span>, <span class="hljs-string">&#x27;ymax&#x27;</span>: <span class="hljs-number">598</span>}}]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="image-segmentation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#image-segmentation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Image segmentation</span></h3> <p data-svelte-h="svelte-ryfffz">画像セグメンテーションは、画像内のすべてのピクセルをクラスに割り当てるピクセルレベルのタスクです。これはオブジェクト検出とは異なり、オブジェクトをラベル付けし、予測するために境界ボックスを使用する代わりに、セグメンテーションはより詳細になります。セグメンテーションはピクセルレベルでオブジェクトを検出できます。画像セグメンテーションにはいくつかのタイプがあります:</p> <ul data-svelte-h="svelte-1rz6dd1"><li>インスタンスセグメンテーション:オブジェクトのクラスをラベル付けするだけでなく、オブジェクトの個別のインスタンス(“犬-1”、“犬-2”)もラベル付けします。</li> <li>パノプティックセグメンテーション:セマンティックセグメンテーションとインスタンスセグメンテーションの組み合わせ。セマンティッククラスごとに各ピクセルにラベルを付け、オブジェクトの個別のインスタンスもラベル付けします。</li></ul> <p data-svelte-h="svelte-rbr5j5">セグメンテーションタスクは、自動運転車にとって、周囲の世界のピクセルレベルのマップを作成し、歩行者や他の車両を安全に回避できるようにするのに役立ちます。また、医療画像では、タスクの細かい粒度が異常な細胞や臓器の特徴を識別するのに役立ちます。画像セグメンテーションは、eコマースで衣類を仮想的に試着したり、カメラを通じて実世界にオブジェクトを重ねて拡張現実の体験を作成したりするためにも使用できます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>segmenter = pipeline(task=<span class="hljs-string">&quot;image-segmentation&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = segmenter(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg&quot;</span>
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = [{<span class="hljs-string">&quot;score&quot;</span>: <span class="hljs-built_in">round</span>(pred[<span class="hljs-string">&quot;score&quot;</span>], <span class="hljs-number">4</span>), <span class="hljs-string">&quot;label&quot;</span>: pred[<span class="hljs-string">&quot;label&quot;</span>]} <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(*preds, sep=<span class="hljs-string">&quot;\n&quot;</span>)
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9879</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;LABEL_184&#x27;</span>}
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9973</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;snow&#x27;</span>}
{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9972</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;cat&#x27;</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="depth-estimation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#depth-estimation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Depth estimation</span></h3> <p data-svelte-h="svelte-qofxu2">深度推定は、画像内の各ピクセルがカメラからの距離を予測します。このコンピュータビジョンタスクは、特にシーンの理解と再構築に重要です。たとえば、自動運転車では、歩行者、交通標識、他の車などの物体がどれだけ遠いかを理解し、障害物や衝突を回避するために必要です。深度情報はまた、2D画像から3D表現を構築し、生物学的構造や建物の高品質な3D表現を作成するのに役立ちます。</p> <p data-svelte-h="svelte-1npln10">深度推定には次の2つのアプローチがあります:</p> <ul data-svelte-h="svelte-1i4tid3"><li>ステレオ:深度は、わずかに異なる角度からの同じ画像の2つの画像を比較して推定されます。</li> <li>モノキュラー:深度は単一の画像から推定されます。</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>depth_estimator = pipeline(task=<span class="hljs-string">&quot;depth-estimation&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = depth_estimator(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg&quot;</span>
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="natural-language-processing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#natural-language-processing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Natural language processing</span></h2> <p data-svelte-h="svelte-1m59zxk">NLPタスクは、テキストが私たちにとって自然なコミュニケーション手段であるため、最も一般的なタスクの一つです。モデルが認識するための形式にテキストを変換するには、トークン化が必要です。これは、テキストのシーケンスを単語やサブワード(トークン)に分割し、それらのトークンを数字に変換することを意味します。その結果、テキストのシーケンスを数字のシーケンスとして表現し、一度数字のシーケンスがあれば、さまざまなNLPタスクを解決するためにモデルに入力できます!</p> <h3 class="relative group"><a id="text-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text classification</span></h3> <p data-svelte-h="svelte-1a39rcj">どんなモダリティの分類タスクと同様に、テキスト分類は事前に定義されたクラスのセットからテキストのシーケンス(文レベル、段落、またはドキュメントであることがあります)にラベルを付けます。テキスト分類には多くの実用的な用途があり、その一部は次のとおりです:</p> <ul data-svelte-h="svelte-1wx0y2z"><li>感情分析:<code>positive</code><code>negative</code>のような極性に従ってテキストにラベルを付け、政治、金融、マーケティングなどの分野での意思決定をサポートします。</li> <li>コンテンツ分類:テキストをトピックに従ってラベル付けし、ニュースやソーシャルメディアのフィード内の情報を整理し、フィルタリングするのに役立ちます(<code>天気</code><code>スポーツ</code><code>金融</code>など)。</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>classifier = pipeline(task=<span class="hljs-string">&quot;sentiment-analysis&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = classifier(<span class="hljs-string">&quot;Hugging Face is the best thing since sliced bread!&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = [{<span class="hljs-string">&quot;score&quot;</span>: <span class="hljs-built_in">round</span>(pred[<span class="hljs-string">&quot;score&quot;</span>], <span class="hljs-number">4</span>), <span class="hljs-string">&quot;label&quot;</span>: pred[<span class="hljs-string">&quot;label&quot;</span>]} <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
<span class="hljs-meta">&gt;&gt;&gt; </span>preds
[{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9991</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-string">&#x27;POSITIVE&#x27;</span>}]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="token-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#token-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Token classification</span></h3> <p data-svelte-h="svelte-mrs7ie">どんなNLPタスクでも、テキストはテキストのシーケンスを個々の単語やサブワードに分割して前処理されます。これらは<a href="/glossary#token">トークン</a>として知られています。トークン分類は、事前に定義されたクラスのセットから各トークンにラベルを割り当てます。</p> <p data-svelte-h="svelte-l02yas">トークン分類の一般的なタイプは次の2つです:</p> <ul data-svelte-h="svelte-109mihx"><li>固有表現認識(NER):組織、人物、場所、日付などのエンティティのカテゴリに従ってトークンにラベルを付けます。NERは特にバイオメディカル環境で人気であり、遺伝子、タンパク質、薬物名などをラベル付けできます。</li> <li>品詞タグ付け(POS):名詞、動詞、形容詞などの品詞に従ってトークンにラベルを付けます。POSは、翻訳システムが同じ単語が文法的にどのように異なるかを理解するのに役立ちます(名詞としての「銀行」と動詞としての「銀行」など)。</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>classifier = pipeline(task=<span class="hljs-string">&quot;ner&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = classifier(<span class="hljs-string">&quot;Hugging Face is a French company based in New York City.&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = [
<span class="hljs-meta">... </span> {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;entity&quot;</span>: pred[<span class="hljs-string">&quot;entity&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;score&quot;</span>: <span class="hljs-built_in">round</span>(pred[<span class="hljs-string">&quot;score&quot;</span>], <span class="hljs-number">4</span>),
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;index&quot;</span>: pred[<span class="hljs-string">&quot;index&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;word&quot;</span>: pred[<span class="hljs-string">&quot;word&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;start&quot;</span>: pred[<span class="hljs-string">&quot;start&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;end&quot;</span>: pred[<span class="hljs-string">&quot;end&quot;</span>],
<span class="hljs-meta">... </span> }
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(*preds, sep=<span class="hljs-string">&quot;\n&quot;</span>)
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9968</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">1</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Hu&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">0</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">2</span>}
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9293</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">2</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;##gging&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">2</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">7</span>}
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-ORG&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9763</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">3</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;Face&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">8</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">12</span>}
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-MISC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9983</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">6</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;French&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">18</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">24</span>}
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.999</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">10</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;New&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">42</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">45</span>}
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9987</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">11</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;York&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">46</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">50</span>}
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-LOC&#x27;</span>, <span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9992</span>, <span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">12</span>, <span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;City&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">51</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">55</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="question-answering" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#question-answering"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Question answering</span></h3> <p data-svelte-h="svelte-edc05x">質問応答は、コンテキスト(オープンドメイン)を含む場合と含まない場合(クローズドドメイン)がある場合もある、別のトークンレベルのタスクで、質問に対する回答を返します。このタスクは、仮想アシスタントにレストランが営業しているかどうかのような質問をするときに発生します。また、顧客や技術サポートを提供し、検索エンジンがあなたが求めている関連情報を取得するのにも役立ちます。</p> <p data-svelte-h="svelte-1vhjwqv">質問応答の一般的なタイプは次の2つです:</p> <ul data-svelte-h="svelte-87xeto"><li>抽出型:質問と一部のコンテキストが与えられた場合、モデルがコンテキストから抽出する必要のあるテキストのスパンが回答となります。</li> <li>抽象的:質問と一部のコンテキストが与えられた場合、回答はコンテキストから生成されます。このアプローチは、<a href="/docs/transformers/pr_33913/ja/main_classes/pipelines#transformers.QuestionAnsweringPipeline">QuestionAnsweringPipeline</a>ではなく<a href="/docs/transformers/pr_33913/ja/main_classes/pipelines#transformers.Text2TextGenerationPipeline">Text2TextGenerationPipeline</a>で処理されます。</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>question_answerer = pipeline(task=<span class="hljs-string">&quot;question-answering&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = question_answerer(
<span class="hljs-meta">... </span> question=<span class="hljs-string">&quot;What is the name of the repository?&quot;</span>,
<span class="hljs-meta">... </span> context=<span class="hljs-string">&quot;The name of the repository is huggingface/transformers&quot;</span>,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(
<span class="hljs-meta">... </span> <span class="hljs-string">f&quot;score: <span class="hljs-subst">{<span class="hljs-built_in">round</span>(preds[<span class="hljs-string">&#x27;score&#x27;</span>], <span class="hljs-number">4</span>)}</span>, start: <span class="hljs-subst">{preds[<span class="hljs-string">&#x27;start&#x27;</span>]}</span>, end: <span class="hljs-subst">{preds[<span class="hljs-string">&#x27;end&#x27;</span>]}</span>, answer: <span class="hljs-subst">{preds[<span class="hljs-string">&#x27;answer&#x27;</span>]}</span>&quot;</span>
<span class="hljs-meta">... </span>)
score: <span class="hljs-number">0.9327</span>, start: <span class="hljs-number">30</span>, end: <span class="hljs-number">54</span>, answer: huggingface/transformers<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="summarization" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#summarization"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Summarization</span></h3> <p data-svelte-h="svelte-oh104g">要約は、長いテキストから短いバージョンを作成し、元の文書の意味の大部分を保ちながら試みるタスクです。要約はシーケンスからシーケンスへのタスクであり、入力よりも短いテキストシーケンスを出力します。要約を行うことで、読者が主要なポイントを迅速に理解できるようにするのに役立つ長文書がたくさんあります。法案、法的および財務文書、特許、科学論文など、読者の時間を節約し読書の支援となる文書の例があります。</p> <p data-svelte-h="svelte-1i939bz">質問応答と同様に、要約には2つのタイプがあります:</p> <ul data-svelte-h="svelte-9v9c4r"><li>抽出的要約:元のテキストから最も重要な文を識別して抽出します。</li> <li>抽象的要約:元のテキストからターゲットの要約(入力文書に含まれていない新しい単語を含むことがあります)を生成します。<a href="/docs/transformers/pr_33913/ja/main_classes/pipelines#transformers.SummarizationPipeline">SummarizationPipeline</a>は抽象的なアプローチを使用しています。</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>summarizer = pipeline(task=<span class="hljs-string">&quot;summarization&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>summarizer(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;In this work, we presented the Transformer, the first sequence transduction model based entirely on attention, replacing the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention. For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art. In the former task our best model outperforms even all previously reported ensembles.&quot;</span>
<span class="hljs-meta">... </span>)
[{<span class="hljs-string">&#x27;summary_text&#x27;</span>: <span class="hljs-string">&#x27; The Transformer is the first sequence transduction model based entirely on attention . It replaces the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention . For translation tasks, the Transformer can be trained significantly faster than architectures based on recurrent or convolutional layers .&#x27;</span>}]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="translation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#translation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Translation</span></h3> <p data-svelte-h="svelte-1eoa6ft">翻訳は、ある言語のテキストシーケンスを別の言語に変換する作業です。これは異なるバックグラウンドを持つ人々がコミュニケーションをとるのに役立ち、広範な観客にコンテンツを翻訳して伝えるのに役立ち、新しい言語を学ぶのを支援する学習ツールにもなります。要約と共に、翻訳はシーケンス間のタスクであり、モデルは入力シーケンスを受け取り、ターゲットの出力シーケンスを返します。</p> <p data-svelte-h="svelte-fyu2t">初期の翻訳モデルは主に単一言語でしたが、最近では多言語モデルに対する関心が高まり、多くの言語対で翻訳できるような多言語モデルに注目が集まっています。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>text = <span class="hljs-string">&quot;translate English to French: Hugging Face is a community-based open-source platform for machine learning.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>translator = pipeline(task=<span class="hljs-string">&quot;translation&quot;</span>, model=<span class="hljs-string">&quot;google-t5/t5-small&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>translator(text)
[{<span class="hljs-string">&#x27;translation_text&#x27;</span>: <span class="hljs-string">&quot;Hugging Face est une tribune communautaire de l&#x27;apprentissage des machines.&quot;</span>}]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="言語モデリング" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#言語モデリング"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>言語モデリング</span></h3> <p data-svelte-h="svelte-ts71z3">言語モデリングは、テキストのシーケンス内の単語を予測するタスクです。事前学習された言語モデルは、多くの他のダウンストリームタスクに対してファインチューニングできるため、非常に人気のあるNLPタスクとなっています。最近では、ゼロショットまたはフューショット学習を実証する大規模な言語モデル(LLM)に大きな関心が寄せられています。これは、モデルが明示的にトレーニングされていないタスクを解決できることを意味します!言語モデルは、流暢で説得力のあるテキストを生成するために使用できますが、テキストが常に正確であるわけではないため、注意が必要です。</p> <p data-svelte-h="svelte-ph2wi1">言語モデリングには2つのタイプがあります:</p> <ul><li><p data-svelte-h="svelte-e09crk">因果的:モデルの目標は、シーケンス内の次のトークンを予測することであり、将来のトークンはマスクされます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>prompt = <span class="hljs-string">&quot;Hugging Face is a community-based open-source platform for machine learning.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>generator = pipeline(task=<span class="hljs-string">&quot;text-generation&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>generator(prompt) <span class="hljs-comment"># doctest: +SKIP</span><!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-1vyw9g3">マスクされた:モデルの目的は、シーケンス内のトークン全体にアクセスしながら、シーケンス内のマスクされたトークンを予測することです。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>text = <span class="hljs-string">&quot;Hugging Face is a community-based open-source &lt;mask&gt; for machine learning.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>fill_mask = pipeline(task=<span class="hljs-string">&quot;fill-mask&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = fill_mask(text, top_k=<span class="hljs-number">1</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = [
<span class="hljs-meta">... </span> {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;score&quot;</span>: <span class="hljs-built_in">round</span>(pred[<span class="hljs-string">&quot;score&quot;</span>], <span class="hljs-number">4</span>),
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;token&quot;</span>: pred[<span class="hljs-string">&quot;token&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;token_str&quot;</span>: pred[<span class="hljs-string">&quot;token_str&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;sequence&quot;</span>: pred[<span class="hljs-string">&quot;sequence&quot;</span>],
<span class="hljs-meta">... </span> }
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>preds
[{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.2236</span>,
<span class="hljs-string">&#x27;token&#x27;</span>: <span class="hljs-number">1761</span>,
<span class="hljs-string">&#x27;token_str&#x27;</span>: <span class="hljs-string">&#x27; platform&#x27;</span>,
<span class="hljs-string">&#x27;sequence&#x27;</span>: <span class="hljs-string">&#x27;Hugging Face is a community-based open-source platform for machine learning.&#x27;</span>}]<!-- HTML_TAG_END --></pre></div></li></ul> <h2 class="relative group"><a id="multimodal" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#multimodal"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Multimodal</span></h2> <p data-svelte-h="svelte-9np98f">マルチモーダルタスクは、特定の問題を解決するために複数のデータモダリティ(テキスト、画像、音声、ビデオ)を処理するためにモデルを必要とします。画像キャプショニングは、モデルが入力として画像を受け取り、画像を説明するテキストのシーケンスまたは画像のいくつかの特性を出力するマルチモーダルタスクの例です。</p> <p data-svelte-h="svelte-miuti3">マルチモーダルモデルは異なるデータタイプまたはモダリティで作業しますが、内部的には前処理ステップがモデルにすべてのデータタイプを埋め込み(データに関する意味のある情報を保持するベクトルまたは数字のリスト)に変換するのを支援します。画像キャプショニングのようなタスクでは、モデルは画像の埋め込みとテキストの埋め込みの間の関係を学習します。</p> <h3 class="relative group"><a id="document-question-answering" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#document-question-answering"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Document question answering</span></h3> <p data-svelte-h="svelte-1q7jq3c">ドキュメント質問応答は、ドキュメントからの自然言語の質問に答えるタスクです。テキストを入力とするトークンレベルの質問応答タスクとは異なり、ドキュメント質問応答はドキュメントの画像とそのドキュメントに関する質問を受け取り、答えを返します。ドキュメント質問応答は構造化されたドキュメントを解析し、それから重要な情報を抽出するために使用できます。以下の例では、レシートから合計金額とお釣りを抽出することができます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> requests
<span class="hljs-meta">&gt;&gt;&gt; </span>url = <span class="hljs-string">&quot;https://huggingface.co/datasets/hf-internal-testing/example-documents/resolve/main/jpeg_images/2.jpg&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw)
<span class="hljs-meta">&gt;&gt;&gt; </span>doc_question_answerer = pipeline(<span class="hljs-string">&quot;document-question-answering&quot;</span>, model=<span class="hljs-string">&quot;magorshunov/layoutlm-invoices&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds = doc_question_answerer(
<span class="hljs-meta">... </span> question=<span class="hljs-string">&quot;What is the total amount?&quot;</span>,
<span class="hljs-meta">... </span> image=image,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>preds
[{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.8531</span>, <span class="hljs-string">&#x27;answer&#x27;</span>: <span class="hljs-string">&#x27;17,000&#x27;</span>, <span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">4</span>, <span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">4</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ik2fe4">このページが各モダリティのタスクの種類とそれぞれの重要性についての追加の背景情報を提供できたことを願っています。次の <a href="tasks_explained">セクション</a> では、🤗 トランスフォーマーがこれらのタスクを解決するために <strong>どのように</strong> 動作するかを学びます。</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/task_summary.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_17iw4ji = {
assets: "/docs/transformers/pr_33913/ja",
base: "/docs/transformers/pr_33913/ja",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/pr_33913/ja/_app/immutable/entry/start.17a8f5f1.js"),
import("/docs/transformers/pr_33913/ja/_app/immutable/entry/app.ec1e8c3e.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 134],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
88.1 kB
·
Xet hash:
527fe017281c9399617d98243f8a2731cc4f69a6cb18b5480eedb9bb7bf00189

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.