| import gradio as gr |
| import torch |
| from PIL import Image |
| from torchvision import transforms |
|
|
| from diffusers import StableDiffusionImageVariationPipeline |
|
|
| def main( |
| input_im, |
| scale=3.0, |
| n_samples=4, |
| steps=25, |
| seed=0, |
| ): |
| generator = torch.Generator(device=device).manual_seed(int(seed)) |
|
|
| tform = transforms.Compose([ |
| transforms.ToTensor(), |
| transforms.Resize( |
| (224, 224), |
| interpolation=transforms.InterpolationMode.BICUBIC, |
| antialias=False, |
| ), |
| transforms.Normalize( |
| [0.48145466, 0.4578275, 0.40821073], |
| [0.26862954, 0.26130258, 0.27577711]), |
| ]) |
| inp = tform(input_im).to(device) |
| |
| images_list = pipe( |
| inp.tile(n_samples, 1, 1, 1), |
| guidance_scale=scale, |
| num_inference_steps=steps, |
| generator=generator, |
| ) |
|
|
| images = [] |
| for i, image in enumerate(images_list["images"]): |
| if(images_list["nsfw_content_detected"][i]): |
| safe_image = Image.open(r"unsafe.png") |
| images.append(safe_image) |
| else: |
| images.append(image) |
| return images |
|
|
|
|
| description = \ |
| """ |
| __Now using Image Variations v2!__ |
| |
| Generate variations on an input image using a fine-tuned version of Stable Diffision. |
| Trained by [Justin Pinkney](https://www.justinpinkney.com) ([@Buntworthy](https://twitter.com/Buntworthy)) at [Lambda](https://lambdalabs.com/) |
| |
| This version has been ported to 🤗 Diffusers library, see more details on how to use this version in the [Lambda Diffusers repo](https://github.com/LambdaLabsML/lambda-diffusers). |
| For the original training code see [this repo](https://github.com/justinpinkney/stable-diffusion). |
| |
|  |
| |
| """ |
|
|
| article = \ |
| """ |
| ## How does this work? |
| |
| The normal Stable Diffusion model is trained to be conditioned on text input. This version has had the original text encoder (from CLIP) removed, and replaced with |
| the CLIP _image_ encoder instead. So instead of generating images based a text input, images are generated to match CLIP's embedding of the image. |
| This creates images which have the same rough style and content, but different details, in particular the composition is generally quite different. |
| This is a totally different approach to the img2img script of the original Stable Diffusion and gives very different results. |
| |
| The model was fine tuned on the [LAION aethetics v2 6+ dataset](https://laion.ai/blog/laion-aesthetics/) to accept the new conditioning. |
| Training was done on 8xA100 GPUs on [Lambda GPU Cloud](https://lambdalabs.com/service/gpu-cloud). |
| More details are on the [model card](https://huggingface.co/lambdalabs/sd-image-variations-diffusers). |
| """ |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| pipe = StableDiffusionImageVariationPipeline.from_pretrained( |
| "lambdalabs/sd-image-variations-diffusers", |
| ) |
| pipe = pipe.to(device) |
|
|
| inputs = [ |
| gr.Image(), |
| gr.Slider(0, 25, value=3, step=1, label="Guidance scale"), |
| gr.Slider(1, 4, value=1, step=1, label="Number images"), |
| gr.Slider(5, 50, value=25, step=5, label="Steps"), |
| gr.Number(0, label="Seed", precision=0) |
| ] |
| output = gr.Gallery(label="Generated variations") |
| output.style(grid=2) |
|
|
| examples = [ |
| ["examples/vermeer.jpg", 3, 1, 25, 0], |
| ["examples/matisse.jpg", 3, 1, 25, 0], |
| ] |
|
|
| demo = gr.Interface( |
| fn=main, |
| title="Stable Diffusion Image Variations", |
| description=description, |
| article=article, |
| inputs=inputs, |
| outputs=output, |
| examples=examples, |
| ) |
| demo.launch() |
|
|