| | |
| | |
| |
|
| | from cog import BasePredictor, Input, Path |
| | import os |
| | from subprocess import call |
| | from cldm.model import create_model, load_state_dict |
| | from ldm.models.diffusion.ddim import DDIMSampler |
| | from PIL import Image |
| | import numpy as np |
| | from typing import List |
| | from utils import get_state_dict_path, download_model, model_dl_urls, annotator_dl_urls |
| |
|
| | MODEL_TYPE = "openpose" |
| |
|
| | if MODEL_TYPE == "canny": |
| | from gradio_canny2image import process_canny |
| | elif MODEL_TYPE == "depth": |
| | from gradio_depth2image import process_depth |
| | elif MODEL_TYPE == "hed": |
| | from gradio_hed2image import process_hed |
| | elif MODEL_TYPE == "normal": |
| | from gradio_normal2image import process_normal |
| | elif MODEL_TYPE == "mlsd": |
| | from gradio_hough2image import process_mlsd |
| | elif MODEL_TYPE == "scribble": |
| | from gradio_scribble2image import process_scribble |
| | elif MODEL_TYPE == "seg": |
| | from gradio_seg2image import process_seg |
| | elif MODEL_TYPE == "openpose": |
| | from gradio_pose2image import process_pose |
| |
|
| | class Predictor(BasePredictor): |
| | def setup(self): |
| | """Load the model into memory to make running multiple predictions efficient""" |
| | self.model = create_model('./models/cldm_v15.yaml').cuda() |
| | self.model.load_state_dict(load_state_dict(get_state_dict_path(MODEL_TYPE), location='cuda')) |
| | self.ddim_sampler = DDIMSampler(self.model) |
| |
|
| | def predict( |
| | self, |
| | image: Path = Input(description="Input image"), |
| | prompt: str = Input(description="Prompt for the model"), |
| | num_samples: str = Input( |
| | description="Number of samples (higher values may OOM)", |
| | choices=['1', '4'], |
| | default='1' |
| | ), |
| | image_resolution: str = Input( |
| | description="Image resolution to be generated", |
| | choices = ['256', '512', '768'], |
| | default='512' |
| | ), |
| | low_threshold: int = Input(description="Canny line detection low threshold", default=100, ge=1, le=255), |
| | high_threshold: int = Input(description="Canny line detection high threshold", default=200, ge=1, le=255), |
| | ddim_steps: int = Input(description="Steps", default=20), |
| | scale: float = Input(description="Scale for classifier-free guidance", default=9.0, ge=0.1, le=30.0), |
| | seed: int = Input(description="Seed", default=None), |
| | eta: float = Input(description="Controls the amount of noise that is added to the input data during the denoising diffusion process. Higher value -> more noise", default=0.0), |
| | a_prompt: str = Input(description="Additional text to be appended to prompt", default="best quality, extremely detailed"), |
| | n_prompt: str = Input(description="Negative Prompt", default="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"), |
| | detect_resolution: int = Input(description="Resolution at which detection method will be applied)", default=512, ge=128, le=1024), |
| | |
| | |
| | |
| | ) -> List[Path]: |
| | """Run a single prediction on the model""" |
| | num_samples = int(num_samples) |
| | image_resolution = int(image_resolution) |
| | if not seed: |
| | seed = np.random.randint(1000000) |
| | else: |
| | seed = int(seed) |
| |
|
| | |
| | input_image = Image.open(image) |
| | |
| | input_image = np.array(input_image) |
| |
|
| | if MODEL_TYPE == "canny": |
| | outputs = process_canny( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | low_threshold, |
| | high_threshold, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | elif MODEL_TYPE == "depth": |
| | outputs = process_depth( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | detect_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | elif MODEL_TYPE == "hed": |
| | outputs = process_hed( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | detect_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | elif MODEL_TYPE == "normal": |
| | outputs = process_normal( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | bg_threshold, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | elif MODEL_TYPE == "mlsd": |
| | outputs = process_mlsd( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | detect_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | value_threshold, |
| | distance_threshold, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | elif MODEL_TYPE == "scribble": |
| | outputs = process_scribble( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | elif MODEL_TYPE == "seg": |
| | outputs = process_seg( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | detect_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | elif MODEL_TYPE == "openpose": |
| | outputs = process_pose( |
| | input_image, |
| | prompt, |
| | a_prompt, |
| | n_prompt, |
| | num_samples, |
| | image_resolution, |
| | detect_resolution, |
| | ddim_steps, |
| | scale, |
| | seed, |
| | eta, |
| | self.model, |
| | self.ddim_sampler, |
| | ) |
| | |
| | |
| | outputs = [Image.fromarray(output) for output in outputs] |
| | |
| | outputs = [output.save(f"tmp/output_{i}.png") for i, output in enumerate(outputs)] |
| | |
| | return [Path(f"tmp/output_{i}.png") for i in range(len(outputs))] |
| |
|