Spaces:

XciD
/

tile-visualizer-api

Sleeping

App Files Files Community

tile-visualizer-api / app.py

XciD HF Staff

feat: add AI render mode with SDXL + ControlNet Tile inpainting

9a8a023 unverified 22 days ago

raw

history blame contribute delete

6.57 kB

	import spaces
	import numpy as np
	import torch
	import gradio as gr
	from PIL import Image, ImageDraw
	from transformers import (
	AutoImageProcessor,
	Mask2FormerForUniversalSegmentation,
	AutoModelForDepthEstimation,
	)
	from diffusers import (
	StableDiffusionXLControlNetInpaintPipeline,
	ControlNetModel,
	AutoencoderKL,
	)

	# ─── Segmentation + Depth models ─────────────────

	seg_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-large-ade-semantic")
	seg_model = Mask2FormerForUniversalSegmentation.from_pretrained(
	"facebook/mask2former-swin-large-ade-semantic"
	)

	FLOOR_KEYWORDS = {'floor', 'flooring', 'rug', 'carpet', 'mat'}
	FLOOR_IDS = set()
	id2label = seg_model.config.id2label
	for idx, label in id2label.items():
	if any(kw in label.lower() for kw in FLOOR_KEYWORDS):
	FLOOR_IDS.add(int(idx))
	print(f"Floor class: {idx} = {label}")
	if not FLOOR_IDS:
	FLOOR_IDS = {3, 28}

	depth_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Large-hf")
	depth_model = AutoModelForDepthEstimation.from_pretrained(
	"depth-anything/Depth-Anything-V2-Large-hf", torch_dtype=torch.float16
	)

	# ─── SDXL + ControlNet Tile for AI rendering ─────

	print("Loading ControlNet Tile + SDXL inpainting pipeline...")
	controlnet = ControlNetModel.from_pretrained(
	"xinsir/controlnet-tile-sdxl-1.0",
	torch_dtype=torch.float16,
	)
	vae = AutoencoderKL.from_pretrained(
	"madebyollin/sdxl-vae-fp16-fix",
	torch_dtype=torch.float16,
	)
	inpaint_pipe = StableDiffusionXLControlNetInpaintPipeline.from_pretrained(
	"diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
	controlnet=controlnet,
	vae=vae,
	torch_dtype=torch.float16,
	variant="fp16",
	)
	inpaint_pipe.enable_model_cpu_offload()
	print("Pipeline loaded.")


	@spaces.GPU(duration=60)
	@torch.inference_mode()
	def predict(image):
	if image is None:
	raise gr.Error("No image provided")

	orig_w, orig_h = image.size
	max_size = 1024
	scale = min(1.0, max_size / max(orig_w, orig_h))
	proc_w, proc_h = int(orig_w * scale), int(orig_h * scale)
	image_resized = image.resize((proc_w, proc_h), Image.LANCZOS)

	device = seg_model.device

	seg_inputs = seg_processor(images=image_resized, return_tensors="pt")
	seg_inputs = {k: v.to(device) for k, v in seg_inputs.items()}
	seg_outputs = seg_model(**seg_inputs)
	seg_result = seg_processor.post_process_semantic_segmentation(
	seg_outputs, target_sizes=[(proc_h, proc_w)]
	)[0]

	seg_map = seg_result.cpu().numpy()
	floor_mask = np.zeros((proc_h, proc_w), dtype=np.uint8)
	unique_classes = np.unique(seg_map)
	print(f"Detected classes: {[(int(c), id2label.get(c, '?')) for c in unique_classes]}")
	for class_id in FLOOR_IDS:
	floor_mask[seg_map == class_id] = 255

	mask_img = Image.fromarray(floor_mask).resize((orig_w, orig_h), Image.NEAREST)

	depth_inputs = depth_processor(images=image_resized, return_tensors="pt")
	depth_inputs = {k: v.to(device, dtype=torch.float16) if v.is_floating_point() else v.to(device) for k, v in depth_inputs.items()}
	depth_outputs = depth_model(**depth_inputs)
	depth_map = depth_outputs.predicted_depth.squeeze().cpu().numpy()

	depth_min, depth_max = depth_map.min(), depth_map.max()
	if depth_max - depth_min > 0:
	depth_norm = ((depth_map - depth_min) / (depth_max - depth_min) * 255).astype(np.uint8)
	else:
	depth_norm = np.zeros_like(depth_map, dtype=np.uint8)

	depth_img = Image.fromarray(depth_norm).resize((orig_w, orig_h), Image.BILINEAR)

	return mask_img, depth_img


	def create_tiled_control_image(tile_texture, width, height):
	"""Tile the texture image to fill width x height."""
	tw, th = tile_texture.size
	control = Image.new("RGB", (width, height))
	for y in range(0, height, th):
	for x in range(0, width, tw):
	control.paste(tile_texture, (x, y))
	return control


	@spaces.GPU(duration=120)
	@torch.inference_mode()
	def render_ai(room_image, tile_texture):
	if room_image is None or tile_texture is None:
	raise gr.Error("Room image and tile texture are required")

	# Step 1: Get floor mask
	mask_img, _ = predict.__wrapped__(room_image)

	# Resize everything to 1024x1024 for SDXL
	size = 1024
	room_resized = room_image.resize((size, size), Image.LANCZOS)
	mask_resized = mask_img.resize((size, size), Image.NEAREST)

	# Step 2: Create tiled control image from tile texture
	tile_size = max(64, size // 8)
	tile_resized = tile_texture.resize((tile_size, tile_size), Image.LANCZOS)
	control_image = create_tiled_control_image(tile_resized, size, size)

	# Step 3: Run SDXL inpainting with ControlNet Tile
	result = inpaint_pipe(
	prompt="ceramic tile floor, tiled floor with repeating pattern, interior design photo, photorealistic",
	negative_prompt="blurry, distorted, low quality, watermark, text",
	image=room_resized,
	mask_image=mask_resized,
	control_image=control_image,
	num_inference_steps=25,
	guidance_scale=7.0,
	controlnet_conditioning_scale=0.9,
	strength=0.95,
	generator=torch.Generator(device="cuda").manual_seed(42),
	).images[0]

	# Resize back to original dimensions
	result = result.resize((room_image.size[0], room_image.size[1]), Image.LANCZOS)

	return result


	with gr.Blocks() as demo:
	gr.Markdown("# Tile Visualizer API")

	with gr.Tab("Segmentation"):
	with gr.Row():
	seg_input = gr.Image(type="pil", label="Room photo")
	with gr.Row():
	mask_output = gr.Image(type="pil", label="Floor mask")
	depth_output = gr.Image(type="pil", label="Depth map")
	seg_btn = gr.Button("Segment")
	seg_btn.click(fn=predict, inputs=seg_input, outputs=[mask_output, depth_output])

	with gr.Tab("AI Render"):
	with gr.Row():
	render_room = gr.Image(type="pil", label="Room photo")
	render_tile = gr.Image(type="pil", label="Tile texture")
	render_output = gr.Image(type="pil", label="Result")
	render_btn = gr.Button("Render")
	render_btn.click(fn=render_ai, inputs=[render_room, render_tile], outputs=render_output)

	app = demo.app

	from starlette.middleware.cors import CORSMiddleware
	app.add_middleware(CORSMiddleware, allow_origins=[""], allow_methods=[""], allow_headers=["*"])

	if __name__ == "__main__":
	demo.launch(ssr_mode=False)