Bootstrap

Diffusers代码学习-多个ControlNet组合

可以根据不同的图像输入组合多个ControlNet条件来创建MultiControlNet。为了获得更好的结果,比较有利的做法是:

1. 有选择的进行遮罩,使其不重叠(例如,遮罩canny图像中姿势条件所在的区域)

2. 使用controlnetconditioning_scale参数进行实验,以确定分配给每个条件输入的权重

下面将结合Canny 边缘检测图像和人体姿态估计图像来生成新图像。

 

# 以下代码为程序运行进行设置

import os

os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
from diffusers.utils import load_image, make_image_grid
from PIL import Image
import numpy as np
import cv2

# 以下代码加载初始图像,并通过Canny边缘检测生成控制图片
original_image = load_image(
    "https://hf-mirror.com/datasets/huggingface/documentation-images/resolve/main/diffusers/landscape.png"
)
image = np.array(original_image)

low_threshold = 100
high_threshold = 200

image = cv2.Canny(image, low_threshold, high_threshold)

# 将姿态重叠的图像中间列清零

zero_start = image.shape[1] // 4

zero_end = zero_start + image.shape[1] // 2
image[:, zero_start:zero_end] = 0

image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)
make_image_grid([original_image, canny_image], rows=1, cols=2)

# 以下代码会引入与人物姿态检测相关的模型

from controlnet_aux import OpenposeDetector

openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
original_image = load_image(
    "https://hf-mirror.com/datasets/huggingface/documentation-images/resolve/main/diffusers/person.png"
)
openpose_image = openpose(original_image)
make_image_grid([original_image, openpose_image], rows=1, cols=2)

from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler
import torch

# 以下代码建立多个ControlNet组合

controlnets = [
ControlNetModel.from_pretrained(
"thibaud/controlnet-openpose-sdxl-1.0", torch_dtype=torch.float16
),
ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True
),
]

vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnets, vae=vae, torch_dtype=torch.float16, use_safetensors=True
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

pipe.enable_model_cpu_offload()

# 以下代码由提示词及控制图片生成图片
prompt = "a giant standing in a fantasy landscape, best quality"
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"

generator = torch.manual_seed(1)

images = [openpose_image.resize((1024, 1024)), canny_image.resize((1024, 1024))]

images = pipe(
    prompt,
    image=images,
    num_inference_steps=25,
    generator=generator,
    negative_prompt=negative_prompt,
    num_images_per_prompt=3,
    controlnet_conditioning_scale=[1.0, 0.8],
).images

output = make_image_grid([original_image, canny_image, openpose_image, images[0].resize((512, 512)), images[1].resize((512, 512)), images[2].resize((512, 512))], rows=2, cols=3)

output.show()

以下是用以形成Canny边缘检测图片的背景图片

图片

以下是人物姿态图片

图片

以下为应用多个ControlNet组合形成的图片。这里挑选3张拼接

图片

;