【ヘルプ】diffusers（SDXL）でcontrol-lora（control-lora-canny-rank256.safetensors）を使いたい【助けて】

2024年2月29日 18:53

わははははタイトル通りです。なにもわからん！！！！！！！！！！
何をしてもうまくいかなかったので備忘録です。参考になりそうなコードを見つけたのでいくつかググってでてきたのでぺたり

https://github.com/Mikubill/sd-webui-controlnet/blob/main/scripts/controlnet_lora.py

https://github.com/HighCWu/control-lora-v2/blob/master/models/control_lora.py

適当に書いてみたけどダメだったコード

test_control-lora_sdxl.py
https://github.com/Mikubill/sd-webui-controlnet/blob/main/scripts/controlnet_lora.pyをインポートして使っています


import sys
from controlnet_lora import LinearWithLoRA, Conv2dWithLoRA, bind_control_lora
from diffusers import StableDiffusionXLPipeline, UniPCMultistepScheduler
from diffusers.utils import load_image
import torch
import torch.nn as nn
from safetensors.torch import load_file
import cv2
import glob
from PIL import Image, ImageEnhance, ImageFilter, ImageOps


base_model_path = "D:/Stable-diffusion/diffusers/cagliostrolab/animagine-xl-3.0"
Canny_path = "D:/Stable-diffusion/controlnet-canny-sdxl-1.0"
controlLora_path = "D:/Stable-diffusion/controlnet/control-lora-canny-rank256.safetensors"

def resize_image(img):

    # 画像の高さと幅を取得
    height, width = img.shape[:2]

    # 長辺のサイズを比較
    if max(height, width) >= 1024:
        # アスペクト比を保持したまま長辺を1200にリサイズ
        if height > width:
            new_width = int(1200 * width / height)
            resized_img = cv2.resize(img, (new_width, 1200))
        else:
            new_height = int(1200 * height / width)
            resized_img = cv2.resize(img, (1200, new_height))
    else:
        # 条件を満たさない場合はそのままのサイズで返す
        resized_img = img

    return resized_img


class ControlModel(torch.nn.Module):
    def __init__(self, lora_weights):
        super(ControlModel, self).__init__()
        # LoRA層を初期化するための重みをロード
        self.lora_layers = nn.ModuleList(self._initialize_lora_layers(lora_weights))

    def _initialize_lora_layers(self, lora_weights):
        layers = []
        # LoRA重みの辞書をイテレートして、各LoRA層を初期化
        for name, weight in lora_weights.items():
            # 'up'と'down'重みを持つ層を特定
            if '.up.weight' in name:
                base_name = name.replace('.up.weight', '')
                # 対応する'down'重みを確認
                down_weight_name = f'{base_name}.down.weight'
                if down_weight_name in lora_weights:
                    # LoRA層タイプ（Linear or Conv2d）を決定
                    if 'linear' in base_name:
                        # LinearWithLoRA層の初期化
                        in_features = weight.shape[1]
                        out_features = lora_weights[down_weight_name].shape[0]
                        linear_lora = LinearWithLoRA(in_features, out_features, bias=False)
                        linear_lora.up = nn.Parameter(weight)
                        linear_lora.down = nn.Parameter(lora_weights[down_weight_name])
                        layers.append(linear_lora)
                    elif 'conv2d' in base_name:
                        # Conv2dWithLoRA層の初期化（ダミーパラメータを使用）
                        in_channels = 3
                        out_channels = 64
                        kernel_size = (3, 3)
                        conv2d_lora = Conv2dWithLoRA(in_channels, out_channels, kernel_size, bias=False)
                        conv2d_lora.up = nn.Parameter(weight)
                        conv2d_lora.down = nn.Parameter(lora_weights[down_weight_name])
                        layers.append(conv2d_lora)
        return layers



def load_control_model(model_path):
    # SafetensorsファイルからLoRA重みをロード
    lora_weights = load_file(model_path)
    # GPUデバイスへの移動
    lora_weights = {k: torch.tensor(v, device='cuda') for k, v in lora_weights.items()}
    control_model = ControlModel(lora_weights)
    return control_model




def main(canny_img, prompt):
    pipe = StableDiffusionXLPipeline.from_pretrained(base_model_path)
    unet = pipe.unet
    # LoRAモデルの統合
    control_lora = load_control_model(controlLora_path)
    bind_control_lora(unet, control_lora)
    
    # Other parts of your main function remain unchanged
    prompt = "SimplepositiveXLv, masterpiece, best quality, " + prompt
    negative_prompt = "negativeXL_D, sdxl-negprompt8-v1, SimplepositiveXLv1, nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name" 

    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
    pipe.enable_xformers_memory_efficient_attention()
    pipe.enable_model_cpu_offload()

    generator = torch.manual_seed(1)
    image = pipe(
        prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=20, generator=generator, image=canny_img
    ).images[0]
    return image

if __name__ == "__main__":
    input_dir ="E:/AI/lineart/Line" 
    output_dir = "E:/AI/lineart/greyscale" 
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    #input_dir以下のpngをすべて検索してfor文で処理。pngと同じ名前のtxtファイルも同時に読み込む
    for img_path in glob.glob(os.path.join(input_dir, '*.png')):
        # 同じ名前の.txtファイルを探す
        txt_path = os.path.splitext(img_path)[0] + '.txt'
        if os.path.exists(txt_path):
            # .txtファイルが存在する場合、その内容を読み込む
            with open(txt_path, 'r') as txt_file:
                prompt = txt_file.read()
        # 画像の読み込み（ここでのパスは適切に設定してください）
        img = cv2.imread(img_path)
        img  = resize_image(img)
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Cannyエッジ検出を適用
        canny_img = cv2.Canny(gray_img, 100, 200)
        canny_img_pil = Image.fromarray(canny_img)
        output_img = main(canny_img_pil, prompt)

        #img_pathからファイル名を取得
        file_name = os.path.basename(img_path)
        output_img.save(os.path.join(output_dir, file_name))

しょうがないから代用に書いたコード

stable-diffusion-webuiのAPIを使用。
追記：Forgeでも動きました

import requests
import json
import base64
from datetime import datetime
import os
import itertools
import random
import re
from PIL import Image, PngImagePlugin, ImageEnhance, ImageFilter, ImageOps
import io
import glob
import cv2


def resize_image(img):
    # 画像の高さと幅を取得
    height, width = img.shape[:2]

    # 長辺のサイズを比較して1200にリサイズ、ただし8の倍数に丸める
    if max(height, width) >= 1024:
        if height > width:
            new_width = int(1200 * width / height)
            new_width -= new_width % 8  # 幅を8の倍数に丸める
            resized_img = cv2.resize(img, (new_width, 1200))
        else:
            new_height = int(1200 * height / width)
            new_height -= new_height % 8  # 高さを8の倍数に丸める
            resized_img = cv2.resize(img, (1200, new_height))
    else:
        # アスペクト比を保持したままサイズを調整して8の倍数に丸める
        new_width = width - width % 8
        new_height = height - height % 8
        resized_img = cv2.resize(img, (new_width, new_height))

    return resized_img


# 画像生成に必要なプロンプトやパラメータを設定する
def build_payload(prompt, nega, w, h, encoded_canny):
    return {
        "denoising_strength": 1.0,
        "prompt": prompt,
        "negative_prompt": nega,
        "seed": -1,
        "sampler_name": "Euler a",
        "steps": 20,
        "cfg_scale": 7,
        "width": w,
        "height": h,
        "alwayson_scripts": {
            "controlnet": {
                "args": [{
                    "input_image": encoded_canny,
                    "module": "none",
                    "model": "control-lora-canny-rank256 [ec2dbbe4]",
                    "weight": 1.2,
                }]            
            }
        }
    }        

# リクエストを送信する
def send_post_request(url, payload):
    headers = {
        "Content-Type": "application/json"
    }
    response = requests.post(url, data=json.dumps(payload), headers=headers)
    return response


def save_image(data, url, file_name):
    image_string = data["images"][0]
    image_bytes = base64.b64decode(image_string)

    # Get image info
    png_payload = {
        "image": "data:image/png;base64," + image_string
    }
    response2 = requests.post(url=f'{url}/sdapi/v1/png-info', json=png_payload)
    image_info = response2.json().get("info")

    # Embed the info as metadata and save the image
    image = Image.open(io.BytesIO(image_bytes))
    pnginfo = PngImagePlugin.PngInfo()
    if image_info:  # Ensure image_info is not None
        pnginfo.add_text("parameters", image_info)


    image.save(file_name, pnginfo=pnginfo)

    return file_name



def create_and_save_images(input_url, prompt, canny_img_pil, file_name):
    url = f"{input_url}/sdapi/v1/txt2img"
    w, h = canny_img_pil.size
    # PIL.Imageをバイト列に変換してからBase64エンコードする
    canny_img_bytes = io.BytesIO()
    canny_img_pil.save(canny_img_bytes, format='PNG')
    encoded_canny = base64.b64encode(canny_img_bytes.getvalue()).decode('utf-8')
    
    prompt = " SimplepositiveXLv1 <lora:shadow01:1>, " + prompt
    nega = "negativeXL_D sdxl-negprompt8-v1m unaestheticXL_AYv1"

    payload = build_payload(prompt, nega, w, h, encoded_canny)
    response = send_post_request(url, payload)
    image_data = response.json()

    if "images" in image_data and image_data["images"]:
        file_name = save_image(image_data, input_url, file_name)
        print(f"Downloaded {file_name} to local")
    else:
        print("Failed to generate image. 'images' key not found in the response.")


if __name__ == '__main__':
    input_dir ="E:/AI/lineart/Line" 
    output_dir = "E:/AI/lineart/greyscale" 

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    url = "http://127.0.0.1:7860" 
    # モデルの一覧を取得
    # sd_models = requests.get(f"{url}/sdapi/v1/sd-models").json()
    # sd_model_names = [i["title"] for i in sd_models]
    # print(sd_model_names)

    model = "animagine-xl-3.0.safetensors [1449e5b0b9]"

    option_payload = {
        "sd_model_checkpoint": model,
    }
    # モデルをロード
    response = requests.post(url=f'{url}/sdapi/v1/options', json=option_payload)
    print(f"Generating images for model: {model}")


    #input_dir以下のpngをすべて検索してfor文で処理。pngと同じ名前のtxtファイルも同時に読み込む
    for img_path in glob.glob(os.path.join(input_dir, '*.png')):
        # 同じ名前の.txtファイルを探す
        txt_path = os.path.splitext(img_path)[0] + '.txt'
        if os.path.exists(txt_path):
            # .txtファイルが存在する場合、その内容を読み込む
            with open(txt_path, 'r') as txt_file:
                prompt = txt_file.read()
        # 画像の読み込み（ここでのパスは適切に設定してください）
        img = cv2.imread(img_path)
        img  = resize_image(img)
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # Cannyエッジ検出を適用
        canny_img = cv2.Canny(gray_img, 100, 200)
        canny_img_pil = Image.fromarray(canny_img)
        file_name = os.path.basename(img_path)
        file_name = os.path.join(output_dir, file_name)
        create_and_save_images(url, prompt, canny_img_pil, file_name)

Forge用コード

import requests
import json
import base64
from datetime import datetime
import os
import itertools
import random
import re
from PIL import Image, PngImagePlugin, ImageEnhance, ImageFilter, ImageOps
import io
import glob
import cv2


def resize_image(img):
    # 画像の高さと幅を取得
    height, width = img.shape[:2]

    # 長辺のサイズを比較して1200にリサイズ、ただし8の倍数に丸める
    if max(height, width) >= 1024:
        if height > width:
            new_width = int(1200 * width / height)
            new_width -= new_width % 8  # 幅を8の倍数に丸める
            resized_img = cv2.resize(img, (new_width, 1200))
        else:
            new_height = int(1200 * height / width)
            new_height -= new_height % 8  # 高さを8の倍数に丸める
            resized_img = cv2.resize(img, (1200, new_height))
    else:
        # アスペクト比を保持したままサイズを調整して8の倍数に丸める
        new_width = width - width % 8
        new_height = height - height % 8
        resized_img = cv2.resize(img, (new_width, new_height))

    return resized_img


# 画像生成に必要なプロンプトやパラメータを設定する
def build_payload(prompt, nega, w, h, unit1):
    return {
        "denoising_strength": 1.0,
        "prompt": prompt,
        "negative_prompt": nega,
        "seed": -1,
        "sampler_name": "Euler a",
        "steps": 20,
        "cfg_scale": 7,
        "width": w,
        "height": h,
        "alwayson_scripts": {"ControlNet": {"args": [unit1]}},
    }        

# リクエストを送信する
def send_post_request(url, payload):
    headers = {
        "Content-Type": "application/json"
    }
    response = requests.post(url, data=json.dumps(payload), headers=headers)
    return response


def save_image(data, url, file_name):
    image_string = data["images"][0]
    image_bytes = base64.b64decode(image_string)

    # Get image info
    png_payload = {
        "image": "data:image/png;base64," + image_string
    }
    response2 = requests.post(url=f'{url}/sdapi/v1/png-info', json=png_payload)
    image_info = response2.json().get("info")

    # Embed the info as metadata and save the image
    image = Image.open(io.BytesIO(image_bytes))
    pnginfo = PngImagePlugin.PngInfo()
    if image_info:  # Ensure image_info is not None
        pnginfo.add_text("parameters", image_info)


    image.save(file_name, pnginfo=pnginfo)

    return file_name



def create_and_save_images(input_url, prompt, canny_img_pil, file_name):
    url = f"{input_url}/sdapi/v1/txt2img"
    w, h = canny_img_pil.size
    # PIL.Imageをバイト列に変換してからBase64エンコードする
    canny_img_bytes = io.BytesIO()
    canny_img_pil.save(canny_img_bytes, format='PNG')
    encoded_canny = base64.b64encode(canny_img_bytes.getvalue()).decode('utf-8')
   
    prompt = "masterpiece, best quality, SimplepositiveXLv1 <lora:shadow01:1.2>, " + prompt
    nega = "negativeXL_D sdxl-negprompt8-v1m unaestheticXL_AYv1, nsfw, lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name"
    unit1 = {
        "image": encoded_canny,
        "mask_image": None,
        "control_mode": "Balanced",
        "enabled": True,
        "guidance_end": 1,
        "guidance_start": 0,
        "pixel_perfect": True,
        "processor_res": 1200,
        "resize_mode": "Just Resize",  # "Just Resize", "Crop and Resize", "Resize and Fill"
        "threshold_a": 64,
        "threshold_b": 64,
        "weight": 1,
        "module": "canny",
        "model": "control-lora-canny-rank256 [ec2dbbe4]",
        "save_detected_map": None,
        "hr_option": "Both"
    }    


    payload = build_payload(prompt, nega, w, h, unit1)
    response = send_post_request(url, payload)
    image_data = response.json()

    if "images" in image_data and image_data["images"]:
        file_name = save_image(image_data, input_url, file_name)
        print(f"Downloaded {file_name} to local")
    else:
        print("Failed to generate image. 'images' key not found in the response.")


if __name__ == '__main__':
    input_dir = "E:/AI/lineart/Line"
    output_dir = "E:/AI/lineart/greyscale"

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    url = "http://127.0.0.1:7860"
    model = "animagine-xl-3.0.safetensors [1449e5b0b9]"

    option_payload = {
        "sd_model_checkpoint": model,
    }

    response = requests.post(url=f'{url}/sdapi/v1/options', json=option_payload)
    print(f"Generating images for model: {model}")

    for img_path in glob.glob(os.path.join(input_dir, '*.png')):
        txt_path = os.path.splitext(img_path)[0] + '.txt'
        if os.path.exists(txt_path):
            with open(txt_path, 'r') as txt_file:
                prompt = txt_file.read()

            # 出力先に同じ名前のファイルが存在する場合は処理をスキップする
            output_path = os.path.join(output_dir, os.path.basename(img_path))
            if os.path.exists(output_path):
                print(f"Skipping {output_path}: File already exists")
                continue

            img = cv2.imread(img_path)
            img = resize_image(img)
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            canny_img = cv2.Canny(gray_img, 100, 200)
            canny_img_pil = Image.fromarray(canny_img)
            create_and_save_images(url, prompt, canny_img_pil, output_path)

この記事が気に入ったらサポートをしてみませんか？