Diffusers 文件
軌跡一致性蒸餾-LoRA
並獲得增強的文件體驗
開始使用
軌跡一致性蒸餾-LoRA
軌跡一致性蒸餾(TCD)使模型能夠以更少的步驟生成更高質量和更詳細的影像。此外,由於蒸餾過程中有效的誤差緩解,TCD 即使在推理步驟較大的情況下也表現出卓越的效能。
TCD 的主要優點是
優於教師模型:TCD 在小推理步驟和大推理步驟下均表現出卓越的生成質量,並超越了 Stable Diffusion XL (SDXL) 的 DPM-Solver++(2S) 的效能。TCD 訓練期間未包含額外的判別器或 LPIPS 監督。
靈活的推理步驟:TCD 取樣時的推理步驟可以自由調整,而不會對影像質量產生不利影響。
自由更改細節級別:在推理過程中,可以透過單個超引數 *gamma* 調整影像的細節級別。
對於像 SDXL 這樣的大型模型,TCD 使用 LoRA 進行訓練以減少記憶體使用。這很有用,因為只要它們共享相同的基本模型,您就可以在不同的微調模型之間重用 LoRA,而無需進一步訓練。
本指南將向您展示如何使用 TCD-LoRA 執行各種任務的推理,例如文字到影像和影像修復,以及如何輕鬆地將 TCD-LoRA 與其他介面卡結合使用。從下表中選擇一個受支援的基本模型及其相應的 TCD-LoRA 檢查點以開始。
基礎模型 | TCD-LoRA 檢查點 |
---|---|
stable-diffusion-v1-5 | TCD-SD15 |
stable-diffusion-2-1-base | TCD-SD21-base |
stable-diffusion-xl-base-1.0 | TCD-SDXL |
請確保您已安裝 PEFT 以獲得更好的 LoRA 支援。
pip install -U peft
一般任務
在本指南中,讓我們使用 StableDiffusionXLPipeline 和 TCDScheduler。使用 load_lora_weights() 方法載入相容 SDXL 的 TCD-LoRA 權重。
TCD-LoRA 推理的一些注意事項是
- 將
num_inference_steps
保持在 4 到 50 之間 - 將
eta
(用於控制每一步的隨機性)設定為 0 到 1 之間。在增加推理步驟數時,您應該使用更高的eta
,但缺點是 TCDScheduler 中較大的eta
會導致影像更模糊。建議使用 0.3 的值以獲得良好結果。
import torch
from diffusers import StableDiffusionXLPipeline, TCDScheduler
device = "cuda"
base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
tcd_lora_id = "h1t/TCD-SDXL-LoRA"
pipe = StableDiffusionXLPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to(device)
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(tcd_lora_id)
pipe.fuse_lora()
prompt = "Painting of the orange cat Otto von Garfield, Count of Bismarck-Schönhausen, Duke of Lauenburg, Minister-President of Prussia. Depicted wearing a Prussian Pickelhaube and eating his favorite meal - lasagna."
image = pipe(
prompt=prompt,
num_inference_steps=4,
guidance_scale=0,
eta=0.3,
generator=torch.Generator(device=device).manual_seed(0),
).images[0]
社群模型
TCD-LoRA 也適用於許多社群微調模型和外掛。例如,載入 animagine-xl-3.0 檢查點,它是 SDXL 的社群微調版本,用於生成動漫影像。
import torch
from diffusers import StableDiffusionXLPipeline, TCDScheduler
device = "cuda"
base_model_id = "cagliostrolab/animagine-xl-3.0"
tcd_lora_id = "h1t/TCD-SDXL-LoRA"
pipe = StableDiffusionXLPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to(device)
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(tcd_lora_id)
pipe.fuse_lora()
prompt = "A man, clad in a meticulously tailored military uniform, stands with unwavering resolve. The uniform boasts intricate details, and his eyes gleam with determination. Strands of vibrant, windswept hair peek out from beneath the brim of his cap."
image = pipe(
prompt=prompt,
num_inference_steps=8,
guidance_scale=0,
eta=0.3,
generator=torch.Generator(device=device).manual_seed(0),
).images[0]
TCD-LoRA 也支援其他在不同樣式上訓練的 LoRA。例如,讓我們載入 TheLastBen/Papercut_SDXL LoRA 並使用 ~loaders.UNet2DConditionLoadersMixin.set_adapters
方法將其與 TCD-LoRA 合併。
檢視 合併 LoRA 指南,瞭解更多高效合併方法。
import torch
from diffusers import StableDiffusionXLPipeline
from scheduling_tcd import TCDScheduler
device = "cuda"
base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
tcd_lora_id = "h1t/TCD-SDXL-LoRA"
styled_lora_id = "TheLastBen/Papercut_SDXL"
pipe = StableDiffusionXLPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to(device)
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(tcd_lora_id, adapter_name="tcd")
pipe.load_lora_weights(styled_lora_id, adapter_name="style")
pipe.set_adapters(["tcd", "style"], adapter_weights=[1.0, 1.0])
prompt = "papercut of a winter mountain, snow"
image = pipe(
prompt=prompt,
num_inference_steps=4,
guidance_scale=0,
eta=0.3,
generator=torch.Generator(device=device).manual_seed(0),
).images[0]
介面卡
TCD-LoRA 功能非常多樣,可以與其他介面卡型別結合使用,例如 ControlNet、IP-Adapter 和 AnimateDiff。
深度 ControlNet
import torch
import numpy as np
from PIL import Image
from transformers import DPTImageProcessor, DPTForDepthEstimation
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
from diffusers.utils import load_image, make_image_grid
from scheduling_tcd import TCDScheduler
device = "cuda"
depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
def get_depth_map(image):
image = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
with torch.no_grad(), torch.autocast(device):
depth_map = depth_estimator(image).predicted_depth
depth_map = torch.nn.functional.interpolate(
depth_map.unsqueeze(1),
size=(1024, 1024),
mode="bicubic",
align_corners=False,
)
depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
depth_map = (depth_map - depth_min) / (depth_max - depth_min)
image = torch.cat([depth_map] * 3, dim=1)
image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
return image
base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
controlnet_id = "diffusers/controlnet-depth-sdxl-1.0"
tcd_lora_id = "h1t/TCD-SDXL-LoRA"
controlnet = ControlNetModel.from_pretrained(
controlnet_id,
torch_dtype=torch.float16,
variant="fp16",
)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
base_model_id,
controlnet=controlnet,
torch_dtype=torch.float16,
variant="fp16",
)
pipe.enable_model_cpu_offload()
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(tcd_lora_id)
pipe.fuse_lora()
prompt = "stormtrooper lecture, photorealistic"
image = load_image("https://huggingface.co/lllyasviel/sd-controlnet-depth/resolve/main/images/stormtrooper.png")
depth_image = get_depth_map(image)
controlnet_conditioning_scale = 0.5 # recommended for good generalization
image = pipe(
prompt,
image=depth_image,
num_inference_steps=4,
guidance_scale=0,
eta=0.3,
controlnet_conditioning_scale=controlnet_conditioning_scale,
generator=torch.Generator(device=device).manual_seed(0),
).images[0]
grid_image = make_image_grid([depth_image, image], rows=1, cols=2)
Canny ControlNet
import torch
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
from diffusers.utils import load_image, make_image_grid
from scheduling_tcd import TCDScheduler
device = "cuda"
base_model_id = "stabilityai/stable-diffusion-xl-base-1.0"
controlnet_id = "diffusers/controlnet-canny-sdxl-1.0"
tcd_lora_id = "h1t/TCD-SDXL-LoRA"
controlnet = ControlNetModel.from_pretrained(
controlnet_id,
torch_dtype=torch.float16,
variant="fp16",
)
pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
base_model_id,
controlnet=controlnet,
torch_dtype=torch.float16,
variant="fp16",
)
pipe.enable_model_cpu_offload()
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
pipe.load_lora_weights(tcd_lora_id)
pipe.fuse_lora()
prompt = "ultrarealistic shot of a furry blue bird"
canny_image = load_image("https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png")
controlnet_conditioning_scale = 0.5 # recommended for good generalization
image = pipe(
prompt,
image=canny_image,
num_inference_steps=4,
guidance_scale=0,
eta=0.3,
controlnet_conditioning_scale=controlnet_conditioning_scale,
generator=torch.Generator(device=device).manual_seed(0),
).images[0]
grid_image = make_image_grid([canny_image, image], rows=1, cols=2)