ai-models/xinsir/controlnet-tile-sdxl-1.0

Public

WeChat Login

Code Issues Pull requests Events Packages Insights

main

controlnet-tile-sdxl-1.0/README.md

qi<xinsir@users.noreply.huggingface.co>

Update README.md

1ae8d952

PreviewCode viewBlame

ControlNet Tile SDXL

images

Image Deblur Example(Repaint Detail)

images_0)

images_1)

images_2)

images_3)

images_4)

Image Variation Example(like midjourney)

images_5)

images_6)

images_7)

images_8)

images_9)

Image Super-resolution(like realESRGAN)

support any aspect ratio and any times upscale, followings are 3 * 3 times

images_5)

images_6)

images_7)

images_8)

Code to Use Tile blur

code reference: https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/main/TTP_tile_preprocessor_v5.py
https://github.com/lllyasviel/ControlNet-v1-1-nightly/blob/main/gradio_tile.py


from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
from guided_filter import FastGuidedFilter # I have upload this file in this repo
import torch
import numpy as np
import cv2

def resize_image_control(control_image, resolution):
    HH, WW, _ = control_image.shape
    crop_h = random.randint(0, HH - resolution[1])
    crop_w = random.randint(0, WW - resolution[0])
    crop_image = control_image[crop_h:crop_h+resolution[1], crop_w:crop_w+resolution[0], :]
    return crop_image, crop_w, crop_h

def apply_gaussian_blur(image_np, ksize=5, sigmaX=1.0):
    if ksize % 2 == 0:
        ksize += 1  # ksize must be odd
    blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX)
    return blurred_image

def apply_guided_filter(image_np, radius, eps, scale):
    filter = FastGuidedFilter(image_np, radius, eps, scale)
    return filter.filter(image_np)


controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)

crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))


blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0]
radius = random.sample([i for i in range(1, 40, 2)], k=1)[0]
eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0]
scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0]


if random.random() > 0.5:
    controlnet_img = apply_gaussian_blur(controlnet_img, ksize=int(blur_strength), sigmaX=blur_strength / 2)            

if random.random() > 0.5:
    # Apply Guided Filter
    controlnet_img = apply_guided_filter(controlnet_img, radius, eps, scale_factor)

# Resize image
controlnet_img = cv2.resize(controlnet_img, (int(W / scale_factor), int(H / scale_factor)), interpolation=cv2.INTER_AREA)
controlnet_img = cv2.resize(controlnet_img, (W, H), interpolation=cv2.INTER_CUBIC)

controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance

images = pipe(
    prompt,
    negative_prompt=negative_prompt,
    image=controlnet_img,
    controlnet_conditioning_scale=controlnet_conditioning_scale,
    width=new_width,
    height=new_height,
    num_inference_steps=30,
    ).images

images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

Code to Use Tile var

Use more detail prompt to regerate can help!


from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2

controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio), int(height * ratio)

crop_w, crop_h = 0, 0
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
images = pipe(
    prompt,
    negative_prompt=negative_prompt,
    image=controlnet_img,
    controlnet_conditioning_scale=controlnet_conditioning_scale,
    width=new_width,
    height=new_height,
    num_inference_steps=30,
    ).images

images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

Code to Use Tile super

performance may unstable and next version is optimizing!


from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
from PIL import Image
import torch
import numpy as np
import cv2

controlnet_conditioning_scale = 1.0  
prompt = "your prompt, the longer the better, you can describe it as detail as possible"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-tile-sdxl-1.0",
    torch_dtype=torch.float16
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=eulera_scheduler,
)

controlnet_img = cv2.imread("your original image path")
height, width, _  = controlnet_img.shape
ratio = np.sqrt(1024. * 1024. / (width * height))
W, H = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48
controlnet_img = cv2.resize(controlnet_img, (W, H))
controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
controlnet_img = Image.fromarray(controlnet_img)

# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
target_width = W // 3
target_height = H // 3

for i in range(3):  # 两行
  for j in range(3):  # 两列
    left = j * target_width
    top = i * target_height
    right = left + target_width
    bottom = top + target_height

    # 根据计算的边界裁剪图像
    cropped_image = controlnet_img.crop((left, top, right, bottom))
    cropped_image = cropped_image.resize((W, H))

    images.append(cropped_image)

seed = random.randint(0, 2147483647)
generator = torch.Generator('cuda').manual_seed(seed)

result_images = []
for sub_img in images:
  new_width, new_height = W, H
  out = pipe(prompt=[prompt]*1,
                    image=sub_img, 
                    control_image=sub_img,
                    negative_prompt=[negative_prompt]*1,
                    generator=generator,
                    width=new_width, 
                    height=new_height,
                    num_inference_steps=30,
                    crops_coords_top_left=(W, H),
                    target_size=(W, H),
                    original_size=(W * 2, H * 2),
                )
  result_images.append(out.images[0])

new_im = Image.new('RGB', (new_width*3, new_height*3))
# 拼接图片到新的图像上
new_im.paste(result_images[0], (0, 0))  
new_im.paste(result_images[1], (new_width, 0))
new_im.paste(result_images[2], (new_width * 2, 0))
new_im.paste(result_images[3], (0, new_height))
new_im.paste(result_images[4], (new_width, new_height))  
new_im.paste(result_images[5], (new_width * 2, new_height))
new_im.paste(result_images[6], (0, new_height * 2))
new_im.paste(result_images[7], (new_width, new_height * 2))
new_im.paste(result_images[8], (new_width * 2, new_height * 2))  

new_im.save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

35/F,Tencent Building,Kejizhongyi Avenue,Nanshan District,Shenzhen

京ICP备11018762号-111