logo
0
1
WeChat Login
qi<xinsir@users.noreply.huggingface.co>
Update README.md

ControlNet Tile SDXL

images

Image Deblur Example(Repaint Detail)

images_0)

images_1)

images_2)

images_3)

images_4)

Image Variation Example(like midjourney)

images_5)

images_6)

images_7)

images_8)

images_9)

Image Super-resolution(like realESRGAN)

support any aspect ratio and any times upscale, followings are 3 * 3 times

images_5)

images_6)

images_7)

images_8)

Code to Use Tile blur

code reference: https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/main/TTP_tile_preprocessor_v5.py
https://github.com/lllyasviel/ControlNet-v1-1-nightly/blob/main/gradio_tile.py

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler from PIL import Image from guided_filter import FastGuidedFilter # I have upload this file in this repo import torch import numpy as np import cv2 def resize_image_control(control_image, resolution): HH, WW, _ = control_image.shape crop_h = random.randint(0, HH - resolution[1]) crop_w = random.randint(0, WW - resolution[0]) crop_image = control_image[crop_h:crop_h+resolution[1], crop_w:crop_w+resolution[0], :] return crop_image, crop_w, crop_h def apply_gaussian_blur(image_np, ksize=5, sigmaX=1.0): if ksize % 2 == 0: ksize += 1 # ksize must be odd blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX) return blurred_image def apply_guided_filter(image_np, radius, eps, scale): filter = FastGuidedFilter(image_np, radius, eps, scale) return filter.filter(image_np) controlnet_conditioning_scale = 1.0 prompt = "your prompt, the longer the better, you can describe it as detail as possible" negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality' eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") controlnet = ControlNetModel.from_pretrained( "xinsir/controlnet-tile-sdxl-1.0", torch_dtype=torch.float16 ) # when test with other base model, you need to change the vae also. vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, safety_checker=None, torch_dtype=torch.float16, scheduler=eulera_scheduler, ) controlnet_img = cv2.imread("your original image path") height, width, _ = controlnet_img.shape ratio = np.sqrt(1024. * 1024. / (width * height)) W, H = int(width * ratio), int(height * ratio) crop_w, crop_h = 0, 0 controlnet_img = cv2.resize(controlnet_img, (W, H)) blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0] radius = random.sample([i for i in range(1, 40, 2)], k=1)[0] eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0] scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0] if random.random() > 0.5: controlnet_img = apply_gaussian_blur(controlnet_img, ksize=int(blur_strength), sigmaX=blur_strength / 2) if random.random() > 0.5: # Apply Guided Filter controlnet_img = apply_guided_filter(controlnet_img, radius, eps, scale_factor) # Resize image controlnet_img = cv2.resize(controlnet_img, (int(W / scale_factor), int(H / scale_factor)), interpolation=cv2.INTER_AREA) controlnet_img = cv2.resize(controlnet_img, (W, H), interpolation=cv2.INTER_CUBIC) controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB) controlnet_img = Image.fromarray(controlnet_img) # need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance images = pipe( prompt, negative_prompt=negative_prompt, image=controlnet_img, controlnet_conditioning_scale=controlnet_conditioning_scale, width=new_width, height=new_height, num_inference_steps=30, ).images images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

Code to Use Tile var

Use more detail prompt to regerate can help!

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler from PIL import Image import torch import numpy as np import cv2 controlnet_conditioning_scale = 1.0 prompt = "your prompt, the longer the better, you can describe it as detail as possible" negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality' eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") controlnet = ControlNetModel.from_pretrained( "xinsir/controlnet-tile-sdxl-1.0", torch_dtype=torch.float16 ) # when test with other base model, you need to change the vae also. vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, safety_checker=None, torch_dtype=torch.float16, scheduler=eulera_scheduler, ) controlnet_img = cv2.imread("your original image path") height, width, _ = controlnet_img.shape ratio = np.sqrt(1024. * 1024. / (width * height)) W, H = int(width * ratio), int(height * ratio) crop_w, crop_h = 0, 0 controlnet_img = cv2.resize(controlnet_img, (W, H)) controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB) controlnet_img = Image.fromarray(controlnet_img) # need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance images = pipe( prompt, negative_prompt=negative_prompt, image=controlnet_img, controlnet_conditioning_scale=controlnet_conditioning_scale, width=new_width, height=new_height, num_inference_steps=30, ).images images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

Code to Use Tile super

performance may unstable and next version is optimizing!

from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler from PIL import Image import torch import numpy as np import cv2 controlnet_conditioning_scale = 1.0 prompt = "your prompt, the longer the better, you can describe it as detail as possible" negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality' eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") controlnet = ControlNetModel.from_pretrained( "xinsir/controlnet-tile-sdxl-1.0", torch_dtype=torch.float16 ) # when test with other base model, you need to change the vae also. vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, safety_checker=None, torch_dtype=torch.float16, scheduler=eulera_scheduler, ) controlnet_img = cv2.imread("your original image path") height, width, _ = controlnet_img.shape ratio = np.sqrt(1024. * 1024. / (width * height)) W, H = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48 controlnet_img = cv2.resize(controlnet_img, (W, H)) controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB) controlnet_img = Image.fromarray(controlnet_img) # need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance target_width = W // 3 target_height = H // 3 for i in range(3): # 两行 for j in range(3): # 两列 left = j * target_width top = i * target_height right = left + target_width bottom = top + target_height # 根据计算的边界裁剪图像 cropped_image = controlnet_img.crop((left, top, right, bottom)) cropped_image = cropped_image.resize((W, H)) images.append(cropped_image) seed = random.randint(0, 2147483647) generator = torch.Generator('cuda').manual_seed(seed) result_images = [] for sub_img in images: new_width, new_height = W, H out = pipe(prompt=[prompt]*1, image=sub_img, control_image=sub_img, negative_prompt=[negative_prompt]*1, generator=generator, width=new_width, height=new_height, num_inference_steps=30, crops_coords_top_left=(W, H), target_size=(W, H), original_size=(W * 2, H * 2), ) result_images.append(out.images[0]) new_im = Image.new('RGB', (new_width*3, new_height*3)) # 拼接图片到新的图像上 new_im.paste(result_images[0], (0, 0)) new_im.paste(result_images[1], (new_width, 0)) new_im.paste(result_images[2], (new_width * 2, 0)) new_im.paste(result_images[3], (0, new_height)) new_im.paste(result_images[4], (new_width, new_height)) new_im.paste(result_images[5], (new_width * 2, new_height)) new_im.paste(result_images[6], (0, new_height * 2)) new_im.paste(result_images[7], (new_width, new_height * 2)) new_im.paste(result_images[8], (new_width * 2, new_height * 2)) new_im.save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")