M2 Macのローカル環境でリアルタイムお絵描きlcm
Apple SiliconのGPU(mps)で実行します
動作状況確認の動画は以下のとおりです
from diffusers import DiffusionPipeline
import torch
import torchvision.transforms as T
from PIL import Image
# モデルをロードし、MPS(Apple Silicon用のGPU)を使用してfloat32に設定
pipe = DiffusionPipeline.from_pretrained("SimianLuo/LCM_Dreamshaper_v7", revision="fb9c5d167af11fd84454ae6493878b10bb63b067", safety_checker=None, custom_pipeline="latent_consistency_img2img")
pipe.to(torch_device="mps", torch_dtype=torch.float32) # float32に設定
pipe.set_progress_bar_config(disable=True)
import gradio as gr
# 画像をTensorに変換するための変換関数
transform = T.Compose([
T.Resize((224, 224)), # 必要に応じてサイズを調整
T.ToTensor(),
T.ConvertImageDtype(torch.float32) # PIL.ImageからTensorへ変換し、dtypeをfloat32に設定
])
def generate(prompt, input_image):
input_tensor = transform(input_image) # PIL.ImageをTensorに変換
image = pipe(prompt, image=input_tensor, num_inference_steps=8, guidance_scale=8.0, lcm_origin_steps=50, strength=0.8).images[0]
if not isinstance(image, Image.Image):
image = T.ToPILImage()(image) # TensorをPIL.Imageに変換
return image.resize((768, 768))
# Gradioインターフェース
with gr.Blocks(title=f"Realtime Latent Consistency Model") as demo:
with gr.Row():
with gr.Column(scale=23):
textbox = gr.Textbox(show_label=False, value="a close-up picture of a fluffy cat")
with gr.Row(variant="default"):
input_image = gr.Image(
show_label=False,
type="pil",
tool="color-sketch",
source="canvas",
height=742,
width=742,
brush_radius=10.0,
)
output_image = gr.Image(
show_label=False,
type="pil",
interactive=False,
height=742,
width=742,
elem_id="output_image",
)
textbox.change(fn=generate, inputs=[textbox, input_image], outputs=[output_image], show_progress=False)
input_image.change(fn=generate, inputs=[textbox, input_image], outputs=[output_image], show_progress=False)
demo.launch(inline=False, share=True)