Update README.md
Browse files
README.md
CHANGED
@@ -36,7 +36,7 @@ from peft import LoraConfig, get_peft_model, PeftModel
|
|
36 |
login(token="替换为你自己的",add_to_git_credential=True)
|
37 |
|
38 |
weight_dtype = torch.bfloat16
|
39 |
-
train_batch_size =
|
40 |
snr_gamma = 5 # SNR 参数,用于信噪比加权损失的调节系数
|
41 |
# 设置随机数种子以确保可重复性
|
42 |
seed = 1126 # 随机数种子
|
@@ -45,25 +45,23 @@ if torch.cuda.is_available():
|
|
45 |
torch.cuda.manual_seed_all(seed)
|
46 |
|
47 |
# 优化器参数
|
48 |
-
unet_learning_rate = 1e-
|
49 |
text_encoder_learning_rate = 1e-4 # 文本编码器的学习率,控制文本嵌入层的参数更新步长
|
50 |
|
51 |
# 学习率调度器参数
|
52 |
lr_scheduler_name = "cosine_with_restarts" # 设置学习率调度器为 Cosine annealing with restarts,逐渐减少学习率并定期重启
|
53 |
lr_warmup_steps = 100 # 学习率预热步数,在最初的 100 步中逐渐增加学习率到最大值
|
54 |
-
max_train_steps =
|
55 |
-
num_cycles =
|
56 |
|
57 |
pretrained_model_name_or_path = "stabilityai/stable-diffusion-2-1"
|
58 |
|
59 |
# LoRA 配置
|
60 |
-
|
61 |
r=32, # LoRA 的秩,即低秩矩阵的维度,决定了参数调整的自由度
|
62 |
lora_alpha=16, # 缩放系数,控制 LoRA 权重对模型的影响
|
63 |
-
|
64 |
-
|
65 |
-
"to_k", "to_q", "to_v", "to_out.0" # 指定 UNet 的 LoRA 应用对象(用于调整 UNet 中的注意力机制)
|
66 |
-
],
|
67 |
lora_dropout=0 # LoRA dropout 概率,0 表示不使用 dropout
|
68 |
)
|
69 |
|
@@ -110,13 +108,13 @@ train_dataloader = DataLoader(dataset, shuffle=True, collate_fn=collate_fn, batc
|
|
110 |
|
111 |
from diffusers import SD3Transformer2DModel
|
112 |
|
113 |
-
def prepare_lora_model(
|
114 |
"""
|
115 |
(1) 目标:
|
116 |
- 加载完整的 Stable Diffusion 模型,包括 LoRA 层,并根据需要合并 LoRA 权重。这包括 Tokenizer、噪声调度器、UNet、VAE 和文本编码器。
|
117 |
|
118 |
(2) 参数:
|
119 |
-
-
|
120 |
- pretrained_model_name_or_path: str, Hugging Face 上的模型名称或路径
|
121 |
- model_path: str, 预训练模型的路径
|
122 |
- resume: bool, 是否从上一次训练中恢复
|
@@ -130,7 +128,7 @@ def prepare_lora_model(lora_config, pretrained_model_name_or_path, model_path=No
|
|
130 |
- text_encoder: CLIPTextModel
|
131 |
"""
|
132 |
# 加载噪声调度器,用于控制扩散模型的噪声添加和移除过程
|
133 |
-
noise_scheduler =
|
134 |
|
135 |
# 加载 Tokenizer,用于将文本标注转换为 tokens
|
136 |
tokenizer = CLIPTokenizer.from_pretrained(
|
@@ -157,51 +155,56 @@ def prepare_lora_model(lora_config, pretrained_model_name_or_path, model_path=No
|
|
157 |
torch_dtype=weight_dtype,
|
158 |
subfolder="unet"
|
159 |
)
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
# 如果设置为继续训练,则加载上一次的模型权重
|
162 |
if resume:
|
163 |
if model_path is None or not os.path.exists(model_path):
|
164 |
raise ValueError("当 resume 设置为 True 时,必须提供有效的 model_path")
|
165 |
# 使用 PEFT 的 from_pretrained 方法加载 LoRA 模型
|
166 |
-
text_encoder = PeftModel.from_pretrained(text_encoder, os.path.join(model_path, "text_encoder"))
|
167 |
unet = PeftModel.from_pretrained(unet, os.path.join(model_path, "unet"))
|
168 |
|
169 |
-
# 确保
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
173 |
|
174 |
-
# 确保文本编码器的可训练参数的 requires_grad 为 True
|
175 |
-
for param in text_encoder.parameters():
|
176 |
-
if param.requires_grad is False:
|
177 |
-
param.requires_grad = True
|
178 |
|
179 |
print(f"✅ 已从 {model_path} 恢复模型权重")
|
180 |
|
181 |
else:
|
182 |
-
|
183 |
-
#
|
184 |
-
unet
|
185 |
|
186 |
# 打印可训练参数数量
|
187 |
-
# print("📊 Text Encoder 可训练参数:")
|
188 |
-
# text_encoder.print_trainable_parameters()
|
189 |
print("📊 UNet 可训练参数:")
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
if merge_lora:
|
193 |
# 合并 LoRA 权重到基础模型,仅在推理时调用
|
194 |
-
text_encoder = text_encoder.merge_and_unload()
|
195 |
unet = unet.merge_and_unload()
|
196 |
|
197 |
# 切换为评估模式
|
198 |
text_encoder.eval()
|
199 |
unet.eval()
|
200 |
|
201 |
-
# 冻结 VAE 参数
|
202 |
-
vae.requires_grad_(False)
|
203 |
-
text_encoder.requires_grad_(False)
|
204 |
-
|
205 |
# 将模型移动到 GPU 上并设置权重的数据类型
|
206 |
unet.to(device, dtype=weight_dtype)
|
207 |
vae.to(device, dtype=weight_dtype)
|
@@ -213,13 +216,9 @@ def prepare_optimizer(unet, text_encoder, unet_learning_rate=5e-4, text_encoder_
|
|
213 |
# 筛选出 UNet 中需要训练的 Lora 层参数
|
214 |
unet_lora_layers = [p for p in unet.parameters() if p.requires_grad]
|
215 |
|
216 |
-
# 筛选出文本编码器中需要训练的 Lora 层参数
|
217 |
-
text_encoder_lora_layers = [p for p in text_encoder.parameters() if p.requires_grad]
|
218 |
-
|
219 |
# 将需要训练的参数分组并设置不同的学习率
|
220 |
trainable_params = [
|
221 |
{"params": unet_lora_layers, "lr": unet_learning_rate},
|
222 |
-
{"params": text_encoder_lora_layers, "lr": text_encoder_learning_rate}
|
223 |
]
|
224 |
|
225 |
# 使用 AdamW 优化器
|
@@ -237,7 +236,7 @@ project_name = "fupo"
|
|
237 |
dataset_name = "fupo"
|
238 |
# 根目录和主要目录
|
239 |
root_dir = "./" # 当前目录
|
240 |
-
main_dir = os.path.join(root_dir, "SD-2-
|
241 |
# 项目目录
|
242 |
project_dir = os.path.join(main_dir, project_name)
|
243 |
model_path = os.path.join(project_dir, "logs", "checkpoint-last")
|
@@ -248,7 +247,7 @@ model_path = os.path.join(project_dir, "logs", "checkpoint-last")
|
|
248 |
|
249 |
# 准备模型
|
250 |
tokenizer, noise_scheduler, unet, vae, text_encoder = prepare_lora_model(
|
251 |
-
|
252 |
pretrained_model_name_or_path,
|
253 |
model_path,
|
254 |
resume=False,
|
@@ -278,14 +277,18 @@ import math
|
|
278 |
from huggingface_hub import HfApi, Repository
|
279 |
from tqdm.auto import tqdm
|
280 |
import torch.nn.functional as F
|
|
|
|
|
281 |
|
|
|
|
|
282 |
output_folder = os.path.join(project_dir, "logs")
|
283 |
# 禁用并行化,避免警告
|
284 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
285 |
|
286 |
# 初始化
|
287 |
global_step = 0
|
288 |
-
|
289 |
|
290 |
# 进度条显示训练进度
|
291 |
progress_bar = tqdm(
|
@@ -297,7 +300,6 @@ progress_bar = tqdm(
|
|
297 |
for epoch in range(math.ceil(max_train_steps / len(train_dataloader))):
|
298 |
# 如果你想在训练中增加评估,那在循环中增加 train() 是有必要的
|
299 |
unet.train()
|
300 |
-
text_encoder.train()
|
301 |
|
302 |
for step, batch in enumerate(train_dataloader):
|
303 |
if global_step >= max_train_steps:
|
@@ -313,7 +315,7 @@ for epoch in range(math.ceil(max_train_steps / len(train_dataloader))):
|
|
313 |
noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
|
314 |
|
315 |
# 获取文本的嵌入表示
|
316 |
-
encoder_hidden_states = text_encoder(batch["input_ids"].to(device))[0]
|
317 |
assert encoder_hidden_states is not None, "Encoder hidden states should not be None"
|
318 |
|
319 |
# 计算目标值
|
@@ -323,7 +325,8 @@ for epoch in range(math.ceil(max_train_steps / len(train_dataloader))):
|
|
323 |
target = noise_scheduler.get_velocity(latents, noise, timesteps) # 预测速度向量
|
324 |
|
325 |
# UNet 模型预测
|
326 |
-
|
|
|
327 |
assert model_pred is not None, "Model prediction should not be None"
|
328 |
|
329 |
# 计算损失
|
@@ -345,34 +348,41 @@ for epoch in range(math.ceil(max_train_steps / len(train_dataloader))):
|
|
345 |
|
346 |
# 反向传播
|
347 |
loss.backward()
|
348 |
-
|
349 |
-
|
350 |
-
|
|
|
|
|
|
|
351 |
progress_bar.update(1)
|
352 |
global_step += 1
|
353 |
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
366 |
|
367 |
# 保存最终模型到 checkpoint-last
|
368 |
save_path = os.path.join(output_folder, "checkpoint-last")
|
369 |
os.makedirs(save_path, exist_ok=True)
|
370 |
-
|
371 |
-
|
|
|
|
|
|
|
|
|
372 |
print(f"💾 已保存最终模型到 {save_path}")
|
373 |
|
374 |
-
print("🎉 微调完成!")
|
375 |
-
|
376 |
-
# 上传到 Hugging Face Hub
|
377 |
-
|
378 |
```
|
|
|
36 |
login(token="替换为你自己的",add_to_git_credential=True)
|
37 |
|
38 |
weight_dtype = torch.bfloat16
|
39 |
+
train_batch_size = 4
|
40 |
snr_gamma = 5 # SNR 参数,用于信噪比加权损失的调节系数
|
41 |
# 设置随机数种子以确保可重复性
|
42 |
seed = 1126 # 随机数种子
|
|
|
45 |
torch.cuda.manual_seed_all(seed)
|
46 |
|
47 |
# 优化器参数
|
48 |
+
unet_learning_rate = 1e-6 # UNet 的学习率,控制 UNet 参数更新的步长
|
49 |
text_encoder_learning_rate = 1e-4 # 文本编码器的学习率,控制文本嵌入层的参数更新步长
|
50 |
|
51 |
# 学习率调度器参数
|
52 |
lr_scheduler_name = "cosine_with_restarts" # 设置学习率调度器为 Cosine annealing with restarts,逐渐减少学习率并定期重启
|
53 |
lr_warmup_steps = 100 # 学习率预热步数,在最初的 100 步中逐渐增加学习率到最大值
|
54 |
+
max_train_steps = 500 # 总训练步数,决定了整个训练过程的迭代次数
|
55 |
+
num_cycles = 1 # Cosine 调度器的周期数量,在训练期间会重复 3 次学习率周期性递减并重启
|
56 |
|
57 |
pretrained_model_name_or_path = "stabilityai/stable-diffusion-2-1"
|
58 |
|
59 |
# LoRA 配置
|
60 |
+
unet_lora_config = LoraConfig(
|
61 |
r=32, # LoRA 的秩,即低秩矩阵的维度,决定了参数调整的自由度
|
62 |
lora_alpha=16, # 缩放系数,控制 LoRA 权重对模型的影响
|
63 |
+
init_lora_weights="gaussian",
|
64 |
+
target_modules=["to_k", "to_q", "to_v", "to_out.0"],
|
|
|
|
|
65 |
lora_dropout=0 # LoRA dropout 概率,0 表示不使用 dropout
|
66 |
)
|
67 |
|
|
|
108 |
|
109 |
from diffusers import SD3Transformer2DModel
|
110 |
|
111 |
+
def prepare_lora_model(unet_lora_config, pretrained_model_name_or_path, model_path=None, resume=False, merge_lora=False):
|
112 |
"""
|
113 |
(1) 目标:
|
114 |
- 加载完整的 Stable Diffusion 模型,包括 LoRA 层,并根据需要合并 LoRA 权重。这包括 Tokenizer、噪声调度器、UNet、VAE 和文本编码器。
|
115 |
|
116 |
(2) 参数:
|
117 |
+
- unet_lora_config: LoraConfig, LoRA 的配置对象
|
118 |
- pretrained_model_name_or_path: str, Hugging Face 上的模型名称或路径
|
119 |
- model_path: str, 预训练模型的路径
|
120 |
- resume: bool, 是否从上一次训练中恢复
|
|
|
128 |
- text_encoder: CLIPTextModel
|
129 |
"""
|
130 |
# 加载噪声调度器,用于控制扩散模型的噪声添加和移除过程
|
131 |
+
noise_scheduler = DDIMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
|
132 |
|
133 |
# 加载 Tokenizer,用于将文本标注转换为 tokens
|
134 |
tokenizer = CLIPTokenizer.from_pretrained(
|
|
|
155 |
torch_dtype=weight_dtype,
|
156 |
subfolder="unet"
|
157 |
)
|
158 |
+
|
159 |
+
# 冻结 VAE 参数
|
160 |
+
vae.requires_grad_(False)
|
161 |
+
text_encoder.requires_grad_(False)
|
162 |
+
unet.requires_grad_(False)
|
163 |
|
164 |
# 如果设置为继续训练,则加载上一次的模型权重
|
165 |
if resume:
|
166 |
if model_path is None or not os.path.exists(model_path):
|
167 |
raise ValueError("当 resume 设置为 True 时,必须提供有效的 model_path")
|
168 |
# 使用 PEFT 的 from_pretrained 方法加载 LoRA 模型
|
169 |
+
# text_encoder = PeftModel.from_pretrained(text_encoder, os.path.join(model_path, "text_encoder"))
|
170 |
unet = PeftModel.from_pretrained(unet, os.path.join(model_path, "unet"))
|
171 |
|
172 |
+
# 确保 LoRA 参数是可训练的,仅将指定的模块参数设为可训练
|
173 |
+
target_modules = ["to_k", "to_q", "to_v", "to_out.0"]
|
174 |
+
|
175 |
+
for name, param in unet.named_parameters():
|
176 |
+
# 只对指定的目标模块设置 requires_grad 为 True
|
177 |
+
if any(target_module in name for target_module in target_modules):
|
178 |
+
param.requires_grad = True # 仅将 LoRA 参数设为可训练
|
179 |
|
|
|
|
|
|
|
|
|
180 |
|
181 |
print(f"✅ 已从 {model_path} 恢复模型权重")
|
182 |
|
183 |
else:
|
184 |
+
|
185 |
+
# 将 LoRA 配置应用到unet
|
186 |
+
unet.add_adapter(unet_lora_config)
|
187 |
|
188 |
# 打印可训练参数数量
|
|
|
|
|
189 |
print("📊 UNet 可训练参数:")
|
190 |
+
trainable_params = 0
|
191 |
+
for name, param in unet.named_parameters():
|
192 |
+
if param.requires_grad:
|
193 |
+
param_count = param.numel() # 计算该参数张量的元素数量
|
194 |
+
trainable_params += param_count
|
195 |
+
# print(f"可训练参数: {name}, 形状: {param.shape}, 参数数量: {param_count}")
|
196 |
+
|
197 |
+
print(f"总的 LoRA 可训练参数数量: {trainable_params}")
|
198 |
|
199 |
if merge_lora:
|
200 |
# 合并 LoRA 权重到基础模型,仅在推理时调用
|
201 |
+
# text_encoder = text_encoder.merge_and_unload()
|
202 |
unet = unet.merge_and_unload()
|
203 |
|
204 |
# 切换为评估模式
|
205 |
text_encoder.eval()
|
206 |
unet.eval()
|
207 |
|
|
|
|
|
|
|
|
|
208 |
# 将模型移动到 GPU 上并设置权重的数据类型
|
209 |
unet.to(device, dtype=weight_dtype)
|
210 |
vae.to(device, dtype=weight_dtype)
|
|
|
216 |
# 筛选出 UNet 中需要训练的 Lora 层参数
|
217 |
unet_lora_layers = [p for p in unet.parameters() if p.requires_grad]
|
218 |
|
|
|
|
|
|
|
219 |
# 将需要训练的参数分组并设置不同的学习率
|
220 |
trainable_params = [
|
221 |
{"params": unet_lora_layers, "lr": unet_learning_rate},
|
|
|
222 |
]
|
223 |
|
224 |
# 使用 AdamW 优化器
|
|
|
236 |
dataset_name = "fupo"
|
237 |
# 根目录和主要目录
|
238 |
root_dir = "./" # 当前目录
|
239 |
+
main_dir = os.path.join(root_dir, "SD-2-1") # 主目录
|
240 |
# 项目目录
|
241 |
project_dir = os.path.join(main_dir, project_name)
|
242 |
model_path = os.path.join(project_dir, "logs", "checkpoint-last")
|
|
|
247 |
|
248 |
# 准备模型
|
249 |
tokenizer, noise_scheduler, unet, vae, text_encoder = prepare_lora_model(
|
250 |
+
unet_lora_config,
|
251 |
pretrained_model_name_or_path,
|
252 |
model_path,
|
253 |
resume=False,
|
|
|
277 |
from huggingface_hub import HfApi, Repository
|
278 |
from tqdm.auto import tqdm
|
279 |
import torch.nn.functional as F
|
280 |
+
from peft.utils import get_peft_model_state_dict
|
281 |
+
from diffusers.utils import convert_state_dict_to_diffusers
|
282 |
|
283 |
+
accumulation_steps = 4 # 梯度累积步数
|
284 |
+
max_norm = 0.5
|
285 |
output_folder = os.path.join(project_dir, "logs")
|
286 |
# 禁用并行化,避免警告
|
287 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
288 |
|
289 |
# 初始化
|
290 |
global_step = 0
|
291 |
+
best_loss = float("inf") # 初始化为正无穷大,存储最佳损失值
|
292 |
|
293 |
# 进度条显示训练进度
|
294 |
progress_bar = tqdm(
|
|
|
300 |
for epoch in range(math.ceil(max_train_steps / len(train_dataloader))):
|
301 |
# 如果你想在训练中增加评估,那在循环中增加 train() 是有必要的
|
302 |
unet.train()
|
|
|
303 |
|
304 |
for step, batch in enumerate(train_dataloader):
|
305 |
if global_step >= max_train_steps:
|
|
|
315 |
noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
|
316 |
|
317 |
# 获取文本的嵌入表示
|
318 |
+
encoder_hidden_states = text_encoder(batch["input_ids"].to(device),return_dict=False)[0]
|
319 |
assert encoder_hidden_states is not None, "Encoder hidden states should not be None"
|
320 |
|
321 |
# 计算目标值
|
|
|
325 |
target = noise_scheduler.get_velocity(latents, noise, timesteps) # 预测速度向量
|
326 |
|
327 |
# UNet 模型预测
|
328 |
+
with torch.autograd.detect_anomaly():
|
329 |
+
model_pred = unet(noisy_latents, timesteps, encoder_hidden_states, return_dict=False)[0]
|
330 |
assert model_pred is not None, "Model prediction should not be None"
|
331 |
|
332 |
# 计算损失
|
|
|
348 |
|
349 |
# 反向传播
|
350 |
loss.backward()
|
351 |
+
torch.nn.utils.clip_grad_norm_(unet.parameters(), max_norm)
|
352 |
+
# 梯度累积
|
353 |
+
if (global_step + 1) % accumulation_steps == 0:
|
354 |
+
optimizer.step()
|
355 |
+
lr_scheduler.step()
|
356 |
+
optimizer.zero_grad()
|
357 |
progress_bar.update(1)
|
358 |
global_step += 1
|
359 |
|
360 |
+
if global_step %100 == 0:
|
361 |
+
# 保存当前损失最低的模型
|
362 |
+
if loss.item() < best_loss:
|
363 |
+
best_loss = loss.item()
|
364 |
+
save_path = os.path.join(output_folder, "best_checkpoint")
|
365 |
+
os.makedirs(save_path, exist_ok=True)
|
366 |
+
|
367 |
+
# 使用 save_pretrained 保存 PeftModel
|
368 |
+
unet_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(unet))
|
369 |
+
StableDiffusionPipeline.save_lora_weights(
|
370 |
+
save_directory=save_path,
|
371 |
+
unet_lora_layers=unet_lora_state_dict,
|
372 |
+
safe_serialization=True,
|
373 |
+
)
|
374 |
+
# text_encoder.save_pretrained(os.path.join(save_path, "text_encoder"))
|
375 |
+
print(f"💾 损失最小模型已保存到 {save_path}, 当前损失: {best_loss}")
|
376 |
|
377 |
# 保存最终模型到 checkpoint-last
|
378 |
save_path = os.path.join(output_folder, "checkpoint-last")
|
379 |
os.makedirs(save_path, exist_ok=True)
|
380 |
+
unet_lora_state_dict = convert_state_dict_to_diffusers(get_peft_model_state_dict(unet))
|
381 |
+
StableDiffusionPipeline.save_lora_weights(
|
382 |
+
save_directory=save_path,
|
383 |
+
unet_lora_layers=unet_lora_state_dict,
|
384 |
+
safe_serialization=True,
|
385 |
+
)
|
386 |
print(f"💾 已保存最终模型到 {save_path}")
|
387 |
|
|
|
|
|
|
|
|
|
388 |
```
|