Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Building Custom Blocks","local":"building-custom-blocks","sections":[{"title":"Project Structure","local":"project-structure","sections":[],"depth":2},{"title":"Quick Start with Template","local":"quick-start-with-template","sections":[{"title":"Download the template","local":"download-the-template","sections":[],"depth":3},{"title":"Edit locally","local":"edit-locally","sections":[],"depth":3},{"title":"Test your block","local":"test-your-block","sections":[],"depth":3},{"title":"Upload to the Hub","local":"upload-to-the-hub","sections":[],"depth":3}],"depth":2},{"title":"Example: Florence-2 Image Annotator","local":"example-florence-2-image-annotator","sections":[{"title":"Define components","local":"define-components","sections":[],"depth":3},{"title":"Define inputs and outputs","local":"define-inputs-and-outputs","sections":[],"depth":3},{"title":"Implement the __call__ method","local":"implement-the-call-method","sections":[],"depth":3}],"depth":2},{"title":"Using Custom Blocks","local":"using-custom-blocks","sections":[],"depth":2},{"title":"Editing custom blocks","local":"editing-custom-blocks","sections":[],"depth":2},{"title":"Next Steps","local":"next-steps","sections":[],"depth":2},{"title":"Dependencies","local":"dependencies","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/pr_12652/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/entry/start.78b62fee.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/scheduler.53228c21.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/singletons.89d0b97a.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/index.e93d0901.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/paths.67f826e3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/entry/app.062e1615.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/preload-helper.222e0275.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/index.100fac89.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/nodes/0.fe8af227.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/nodes/271.acf8f2cf.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/CopyLLMTxtMenu.50ab6782.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/IconCopy.38cf8f56.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.720a8c3c.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/CodeBlock.d30a6509.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/pr_12652/en/_app/immutable/chunks/HfOption.fad27e59.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Building Custom Blocks","local":"building-custom-blocks","sections":[{"title":"Project Structure","local":"project-structure","sections":[],"depth":2},{"title":"Quick Start with Template","local":"quick-start-with-template","sections":[{"title":"Download the template","local":"download-the-template","sections":[],"depth":3},{"title":"Edit locally","local":"edit-locally","sections":[],"depth":3},{"title":"Test your block","local":"test-your-block","sections":[],"depth":3},{"title":"Upload to the Hub","local":"upload-to-the-hub","sections":[],"depth":3}],"depth":2},{"title":"Example: Florence-2 Image Annotator","local":"example-florence-2-image-annotator","sections":[{"title":"Define components","local":"define-components","sections":[],"depth":3},{"title":"Define inputs and outputs","local":"define-inputs-and-outputs","sections":[],"depth":3},{"title":"Implement the __call__ method","local":"implement-the-call-method","sections":[],"depth":3}],"depth":2},{"title":"Using Custom Blocks","local":"using-custom-blocks","sections":[],"depth":2},{"title":"Editing custom blocks","local":"editing-custom-blocks","sections":[],"depth":2},{"title":"Next Steps","local":"next-steps","sections":[],"depth":2},{"title":"Dependencies","local":"dependencies","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="building-custom-blocks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#building-custom-blocks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Building Custom Blocks</span></h1> <p data-svelte-h="svelte-mze0yb"><a href="./pipeline_block">ModularPipelineBlocks</a> are the fundamental building blocks of a <a href="/docs/diffusers/pr_12652/en/api/modular_diffusers/pipeline#diffusers.ModularPipeline">ModularPipeline</a>. You can create custom blocks by defining their inputs, outputs, and computation logic. This guide demonstrates how to create and use a custom block.</p> <blockquote class="tip" data-svelte-h="svelte-b8wkoi"><p>Explore the <a href="https://huggingface.co/collections/diffusers/modular-diffusers-custom-blocks" rel="nofollow">Modular Diffusers Custom Blocks</a> collection for official custom blocks.</p></blockquote> <h2 class="relative group"><a id="project-structure" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#project-structure"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Project Structure</span></h2> <p data-svelte-h="svelte-vxw3lv">Your custom block project should use the following structure:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->. | |
| ├── block.py | |
| └── modular_config.json<!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-lnhirc"><li><code>block.py</code> contains the custom block implementation</li> <li><code>modular_config.json</code> contains the metadata needed to load the block</li></ul> <h2 class="relative group"><a id="quick-start-with-template" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quick-start-with-template"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quick Start with Template</span></h2> <p data-svelte-h="svelte-16yvy8e">The fastest way to create a custom block is to start from our template. The template provides a pre-configured project structure with <code>block.py</code> and <code>modular_config.json</code> files, plus commented examples showing how to define components, inputs, outputs, and the <code>__call__</code> method—so you can focus on your custom logic instead of boilerplate setup.</p> <h3 class="relative group"><a id="download-the-template" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#download-the-template"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Download the template</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ModularPipelineBlocks | |
| model_id = <span class="hljs-string">"diffusers/custom-block-template"</span> | |
| local_dir = model_id.split(<span class="hljs-string">"/"</span>)[-<span class="hljs-number">1</span>] | |
| blocks = ModularPipelineBlocks.from_pretrained( | |
| model_id, | |
| trust_remote_code=<span class="hljs-literal">True</span>, | |
| local_dir=local_dir | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f5x43k">This saves the template files to <code>custom-block-template/</code> locally or you could use <code>local_dir</code> to save to a specific location.</p> <h3 class="relative group"><a id="edit-locally" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#edit-locally"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Edit locally</span></h3> <p data-svelte-h="svelte-n36as2">Open <code>block.py</code> and implement your custom block. The template includes commented examples showing how to define each property. See the <a href="#example-florence-2-image-annotator">Florence-2 example</a> below for a complete implementation.</p> <h3 class="relative group"><a id="test-your-block" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#test-your-block"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Test your block</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ModularPipelineBlocks | |
| blocks = ModularPipelineBlocks.from_pretrained(local_dir, trust_remote_code=<span class="hljs-literal">True</span>) | |
| pipeline = blocks.init_pipeline() | |
| output = pipeline(...) <span class="hljs-comment"># your inputs here</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="upload-to-the-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#upload-to-the-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Upload to the Hub</span></h3> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline.save_pretrained(local_dir, repo_id=<span class="hljs-string">"your-username/your-block-name"</span>, push_to_hub=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="example-florence-2-image-annotator" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-florence-2-image-annotator"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example: Florence-2 Image Annotator</span></h2> <p data-svelte-h="svelte-obq0ln">This example creates a custom block with <a href="https://huggingface.co/docs/transformers/model_doc/florence2" rel="nofollow">Florence-2</a> to process an input image and generate a mask for inpainting.</p> <h3 class="relative group"><a id="define-components" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#define-components"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Define components</span></h3> <p data-svelte-h="svelte-ufbt34">Define the components the block needs, <code>Florence2ForConditionalGeneration</code> and its processor. When defining components, specify the <code>name</code> (how you’ll access it in code), <code>type_hint</code> (the model class), and <code>pretrained_model_name_or_path</code> (where to load weights from).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Inside block.py</span> | |
| <span class="hljs-keyword">from</span> diffusers.modular_pipelines <span class="hljs-keyword">import</span> ModularPipelineBlocks, ComponentSpec | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor, Florence2ForConditionalGeneration | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">Florence2ImageAnnotatorBlock</span>(<span class="hljs-title class_ inherited__">ModularPipelineBlocks</span>): | |
| <span class="hljs-meta"> @property</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">expected_components</span>(<span class="hljs-params">self</span>): | |
| <span class="hljs-keyword">return</span> [ | |
| ComponentSpec( | |
| name=<span class="hljs-string">"image_annotator"</span>, | |
| type_hint=Florence2ForConditionalGeneration, | |
| pretrained_model_name_or_path=<span class="hljs-string">"florence-community/Florence-2-base-ft"</span>, | |
| ), | |
| ComponentSpec( | |
| name=<span class="hljs-string">"image_annotator_processor"</span>, | |
| type_hint=AutoProcessor, | |
| pretrained_model_name_or_path=<span class="hljs-string">"florence-community/Florence-2-base-ft"</span>, | |
| ), | |
| ]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="define-inputs-and-outputs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#define-inputs-and-outputs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Define inputs and outputs</span></h3> <p data-svelte-h="svelte-1xobxrd">Inputs include the image, annotation task, and prompt. Outputs include the generated mask and annotations.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> typing <span class="hljs-keyword">import</span> <span class="hljs-type">List</span>, <span class="hljs-type">Union</span> | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">from</span> diffusers.modular_pipelines <span class="hljs-keyword">import</span> InputParam, OutputParam | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">Florence2ImageAnnotatorBlock</span>(<span class="hljs-title class_ inherited__">ModularPipelineBlocks</span>): | |
| <span class="hljs-comment"># ... expected_components from above ...</span> | |
| <span class="hljs-meta"> @property</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">inputs</span>(<span class="hljs-params">self</span>) -> <span class="hljs-type">List</span>[InputParam]: | |
| <span class="hljs-keyword">return</span> [ | |
| InputParam( | |
| <span class="hljs-string">"image"</span>, | |
| type_hint=<span class="hljs-type">Union</span>[Image.Image, <span class="hljs-type">List</span>[Image.Image]], | |
| required=<span class="hljs-literal">True</span>, | |
| description=<span class="hljs-string">"Image(s) to annotate"</span>, | |
| ), | |
| InputParam( | |
| <span class="hljs-string">"annotation_task"</span>, | |
| type_hint=<span class="hljs-built_in">str</span>, | |
| default=<span class="hljs-string">"<REFERRING_EXPRESSION_SEGMENTATION>"</span>, | |
| description=<span class="hljs-string">"Annotation task to perform (e.g., <OD>, <CAPTION>, <REFERRING_EXPRESSION_SEGMENTATION>)"</span>, | |
| ), | |
| InputParam( | |
| <span class="hljs-string">"annotation_prompt"</span>, | |
| type_hint=<span class="hljs-built_in">str</span>, | |
| required=<span class="hljs-literal">True</span>, | |
| description=<span class="hljs-string">"Prompt to provide context for the annotation task"</span>, | |
| ), | |
| InputParam( | |
| <span class="hljs-string">"annotation_output_type"</span>, | |
| type_hint=<span class="hljs-built_in">str</span>, | |
| default=<span class="hljs-string">"mask_image"</span>, | |
| description=<span class="hljs-string">"Output type: 'mask_image', 'mask_overlay', or 'bounding_box'"</span>, | |
| ), | |
| ] | |
| <span class="hljs-meta"> @property</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">intermediate_outputs</span>(<span class="hljs-params">self</span>) -> <span class="hljs-type">List</span>[OutputParam]: | |
| <span class="hljs-keyword">return</span> [ | |
| OutputParam( | |
| <span class="hljs-string">"mask_image"</span>, | |
| type_hint=Image.Image, | |
| description=<span class="hljs-string">"Inpainting mask for the input image"</span>, | |
| ), | |
| OutputParam( | |
| <span class="hljs-string">"annotations"</span>, | |
| type_hint=<span class="hljs-built_in">dict</span>, | |
| description=<span class="hljs-string">"Raw annotation predictions"</span>, | |
| ), | |
| OutputParam( | |
| <span class="hljs-string">"image"</span>, | |
| type_hint=Image.Image, | |
| description=<span class="hljs-string">"Annotated image"</span>, | |
| ), | |
| ]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="implement-the-call-method" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#implement-the-call-method"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Implement the __call__ method</span></h3> <p data-svelte-h="svelte-19nz9wn">The <code>__call__</code> method contains the block’s logic. Access inputs via <code>block_state</code>, run your computation, and set outputs back to <code>block_state</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers.modular_pipelines <span class="hljs-keyword">import</span> PipelineState | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">Florence2ImageAnnotatorBlock</span>(<span class="hljs-title class_ inherited__">ModularPipelineBlocks</span>): | |
| <span class="hljs-comment"># ... expected_components, inputs, intermediate_outputs from above ...</span> | |
| <span class="hljs-meta"> @torch.no_grad()</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">__call__</span>(<span class="hljs-params">self, components, state: PipelineState</span>) -> PipelineState: | |
| block_state = self.get_block_state(state) | |
| images, annotation_task_prompt = self.prepare_inputs( | |
| block_state.image, block_state.annotation_prompt | |
| ) | |
| task = block_state.annotation_task | |
| fill = block_state.fill | |
| annotations = self.get_annotations( | |
| components, images, annotation_task_prompt, task | |
| ) | |
| block_state.annotations = annotations | |
| <span class="hljs-keyword">if</span> block_state.annotation_output_type == <span class="hljs-string">"mask_image"</span>: | |
| block_state.mask_image = self.prepare_mask(images, annotations) | |
| <span class="hljs-keyword">else</span>: | |
| block_state.mask_image = <span class="hljs-literal">None</span> | |
| <span class="hljs-keyword">if</span> block_state.annotation_output_type == <span class="hljs-string">"mask_overlay"</span>: | |
| block_state.image = self.prepare_mask(images, annotations, overlay=<span class="hljs-literal">True</span>, fill=fill) | |
| <span class="hljs-keyword">elif</span> block_state.annotation_output_type == <span class="hljs-string">"bounding_box"</span>: | |
| block_state.image = self.prepare_bounding_boxes(images, annotations) | |
| self.set_block_state(state, block_state) | |
| <span class="hljs-keyword">return</span> components, state | |
| <span class="hljs-comment"># Helper methods for mask/bounding box generation...</span><!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-ew7kh0"><p>See the complete implementation at <a href="https://huggingface.co/diffusers/Florence2-image-Annotator" rel="nofollow">diffusers/Florence2-image-Annotator</a>.</p></blockquote> <h2 class="relative group"><a id="using-custom-blocks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-custom-blocks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using Custom Blocks</span></h2> <p data-svelte-h="svelte-1rm7le4">Load a custom block with <a href="/docs/diffusers/pr_12652/en/api/modular_diffusers/pipeline#diffusers.ModularPipeline.from_pretrained">from_pretrained()</a> and set <code>trust_remote_code=True</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ModularPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-comment"># Load the Florence-2 annotator pipeline</span> | |
| image_annotator = ModularPipeline.from_pretrained( | |
| <span class="hljs-string">"diffusers/Florence2-image-Annotator"</span>, | |
| trust_remote_code=<span class="hljs-literal">True</span> | |
| ) | |
| <span class="hljs-comment"># Check the docstring to see inputs/outputs</span> | |
| <span class="hljs-built_in">print</span>(image_annotator.blocks.doc)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1n0k2bg">Use the block to generate a mask:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->image_annotator.load_components(torch_dtype=torch.bfloat16) | |
| image_annotator.to(<span class="hljs-string">"cuda"</span>) | |
| image = load_image(<span class="hljs-string">"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"</span>) | |
| image = image.resize((<span class="hljs-number">1024</span>, <span class="hljs-number">1024</span>)) | |
| prompt = [<span class="hljs-string">"A red car"</span>] | |
| annotation_task = <span class="hljs-string">"<REFERRING_EXPRESSION_SEGMENTATION>"</span> | |
| annotation_prompt = [<span class="hljs-string">"the car"</span>] | |
| mask_image = image_annotator_node( | |
| prompt=prompt, | |
| image=image, | |
| annotation_task=annotation_task, | |
| annotation_prompt=annotation_prompt, | |
| annotation_output_type=<span class="hljs-string">"mask_image"</span>, | |
| ).images | |
| mask_image[<span class="hljs-number">0</span>].save(<span class="hljs-string">"car-mask.png"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cov9wt">Compose it with other blocks to create a new pipeline:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Get the annotator block</span> | |
| annotator_block = image_annotator.blocks | |
| <span class="hljs-comment"># Get an inpainting workflow and insert the annotator at the beginning</span> | |
| inpaint_blocks = ModularPipeline.from_pretrained(<span class="hljs-string">"Qwen/Qwen-Image"</span>).blocks.get_workflow(<span class="hljs-string">"inpainting"</span>) | |
| inpaint_blocks.sub_blocks.insert(<span class="hljs-string">"image_annotator"</span>, annotator_block, <span class="hljs-number">0</span>) | |
| <span class="hljs-comment"># Initialize the combined pipeline</span> | |
| pipe = inpaint_blocks.init_pipeline() | |
| pipe.load_components(torch_dtype=torch.float16, device=<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Now the pipeline automatically generates masks from prompts</span> | |
| output = pipe( | |
| prompt=prompt, | |
| image=image, | |
| annotation_task=annotation_task, | |
| annotation_prompt=annotation_prompt, | |
| annotation_output_type=<span class="hljs-string">"mask_image"</span>, | |
| num_inference_steps=<span class="hljs-number">35</span>, | |
| guidance_scale=<span class="hljs-number">7.5</span>, | |
| strength=<span class="hljs-number">0.95</span>, | |
| output=<span class="hljs-string">"images"</span> | |
| ) | |
| output[<span class="hljs-number">0</span>].save(<span class="hljs-string">"florence-inpainting.png"</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="editing-custom-blocks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#editing-custom-blocks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Editing custom blocks</span></h2> <p data-svelte-h="svelte-1b8ba8x">Edit custom blocks by downloading it locally. This is the same workflow as the <a href="#quick-start-with-template">Quick Start with Template</a>, but starting from an existing block instead of the template.</p> <p data-svelte-h="svelte-1si0b3r">Use the <code>local_dir</code> argument to download a custom block to a specific folder:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> ModularPipelineBlocks | |
| <span class="hljs-comment"># Download to a local folder for editing</span> | |
| annotator_block = ModularPipelineBlocks.from_pretrained( | |
| <span class="hljs-string">"diffusers/Florence2-image-Annotator"</span>, | |
| trust_remote_code=<span class="hljs-literal">True</span>, | |
| local_dir=<span class="hljs-string">"./my-florence-block"</span> | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-yy2vde">Any changes made to the block files in this folder will be reflected when you load the block again. When you’re ready to share your changes, upload to a new repository:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline = annotator_block.init_pipeline() | |
| pipeline.save_pretrained(<span class="hljs-string">"./my-florence-block"</span>, repo_id=<span class="hljs-string">"your-username/my-custom-florence"</span>, push_to_hub=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next Steps</span></h2> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">Learn block types </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Use in Mellon </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">Explore existing blocks </div></div> <div class="language-select"><p data-svelte-h="svelte-60nswh">This guide covered creating a single custom block. Learn how to compose multiple blocks together:</p> <ul data-svelte-h="svelte-wg1sg7"><li><a href="./sequential_pipeline_blocks">SequentialPipelineBlocks</a>: Chain blocks to execute in sequence</li> <li><a href="./auto_pipeline_blocks">ConditionalPipelineBlocks</a>: Create conditional blocks that select different execution paths</li> <li><a href="./loop_sequential_pipeline_blocks">LoopSequentialPipelineBlocks</a>: Define an iterative workflows like the denoising loop</li></ul> </div> <h2 class="relative group"><a id="dependencies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dependencies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dependencies</span></h2> <p data-svelte-h="svelte-lqsjnx">Declaring package dependencies in custom blocks prevents runtime import errors later on. Diffusers validates the dependencies and returns a warning if a package is missing or incompatible.</p> <p data-svelte-h="svelte-1g2dreo">Set a <code>_requirements</code> attribute in your block class, mapping package names to version specifiers.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers.modular_pipelines <span class="hljs-keyword">import</span> PipelineBlock | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">MyCustomBlock</span>(<span class="hljs-title class_ inherited__">PipelineBlock</span>): | |
| _requirements = { | |
| <span class="hljs-string">"transformers"</span>: <span class="hljs-string">">=4.44.0"</span>, | |
| <span class="hljs-string">"sentencepiece"</span>: <span class="hljs-string">">=0.2.0"</span> | |
| }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-wex2kd">When there are blocks with different requirements, Diffusers merges their requirements.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers.modular_pipelines <span class="hljs-keyword">import</span> SequentialPipelineBlocks | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">BlockA</span>(<span class="hljs-title class_ inherited__">PipelineBlock</span>): | |
| _requirements = {<span class="hljs-string">"transformers"</span>: <span class="hljs-string">">=4.44.0"</span>} | |
| <span class="hljs-comment"># ...</span> | |
| <span class="hljs-keyword">class</span> <span class="hljs-title class_">BlockB</span>(<span class="hljs-title class_ inherited__">PipelineBlock</span>): | |
| _requirements = {<span class="hljs-string">"sentencepiece"</span>: <span class="hljs-string">">=0.2.0"</span>} | |
| <span class="hljs-comment"># ...</span> | |
| pipe = SequentialPipelineBlocks.from_blocks_dict({ | |
| <span class="hljs-string">"block_a"</span>: BlockA, | |
| <span class="hljs-string">"block_b"</span>: BlockB, | |
| })<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sdmkei">When this block is saved with <a href="/docs/diffusers/pr_12652/en/api/modular_diffusers/pipeline#diffusers.ModularPipeline.save_pretrained">save_pretrained()</a>, the requirements are saved to the <code>modular_config.json</code> file. When this block is loaded, Diffusers checks each requirement against the current environment. If there is a mismatch or a package isn’t found, Diffusers returns the following warning.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-section"># missing package</span> | |
| xyz-package was specified in the requirements but wasn't found in the current environment. | |
| <span class="hljs-section"># version mismatch</span> | |
| xyz requirement 'specific-version' is not satisfied by the installed version 'actual-version'. Things might work unexpected.<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/diffusers/blob/main/docs/source/en/modular_diffusers/custom_blocks.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1te7xiv = { | |
| assets: "/docs/diffusers/pr_12652/en", | |
| base: "/docs/diffusers/pr_12652/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/pr_12652/en/_app/immutable/entry/start.78b62fee.js"), | |
| import("/docs/diffusers/pr_12652/en/_app/immutable/entry/app.062e1615.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 271], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 64.7 kB
- Xet hash:
- eec4829369df5a7f428321a430e29aaea68eab83d3054b2a92aa89e1835c739c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.