可以根据不同的图像输入组合多个ControlNet条件来创建MultiControlNet。为了获得更好的结果,比较有利的做法是:
1. 有选择的进行遮罩,使其不重叠(例如,遮罩canny图像中姿势条件所在的区域)
2. 使用controlnetconditioning_scale参数进行实验,以确定分配给每个条件输入的权重
下面将结合Canny 边缘检测图像和人体姿态估计图像来生成新图像。
# 以下代码为程序运行进行设置
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
from diffusers.utils import load_image, make_image_grid
from PIL import Image
import numpy as np
import cv2
# 以下代码加载初始图像,并通过Canny边缘检测生成控制图片
original_image = load_image( "https://hf-mirror.com/datasets/huggingface/documentation-images/resolve/main/diffusers/landscape.png" ) image = np.array(original_image) low_threshold = 100 high_threshold = 200 image = cv2.Canny(image, low_threshold, high_threshold)
# 将姿态重叠的图像中间列清零
zero_start = image.shape[1] // 4
zero_end = zero_start + image.shape[1] // 2
image[:, zero_start:zero_end] = 0
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)
make_image_grid([original_image, canny_image], rows=1, cols=2)
# 以下代码会引入与人物姿态检测相关的模型
from controlnet_aux import OpenposeDetector openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") original_image = load_image( "https://hf-mirror.com/datasets/huggingface/documentation-images/resolve/main/diffusers/person.png" ) openpose_image = openpose(original_image) make_image_grid([original_image, openpose_image], rows=1, cols=2) from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel, AutoencoderKL, UniPCMultistepScheduler import torch
# 以下代码建立多个ControlNet组合
controlnets = [
ControlNetModel.from_pretrained(
"thibaud/controlnet-openpose-sdxl-1.0", torch_dtype=torch.float16
),
ControlNetModel.from_pretrained(
"diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, use_safetensors=True
),
]
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnets, vae=vae, torch_dtype=torch.float16, use_safetensors=True
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
# 以下代码由提示词及控制图片生成图片
prompt = "a giant standing in a fantasy landscape, best quality" negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality" generator = torch.manual_seed(1) images = [openpose_image.resize((1024, 1024)), canny_image.resize((1024, 1024))] images = pipe( prompt, image=images, num_inference_steps=25, generator=generator, negative_prompt=negative_prompt, num_images_per_prompt=3, controlnet_conditioning_scale=[1.0, 0.8], ).images output = make_image_grid([original_image, canny_image, openpose_image, images[0].resize((512, 512)), images[1].resize((512, 512)), images[2].resize((512, 512))], rows=2, cols=3)
output.show()
以下是用以形成Canny边缘检测图片的背景图片
以下是人物姿态图片
以下为应用多个ControlNet组合形成的图片。这里挑选3张拼接