z_image_turbo / z_image_convert_original_to_comfy.py
comfyanonymous's picture
Upload z_image_convert_original_to_comfy.py
4f1e20c verified
raw
history blame
1.6 kB
import safetensors.torch
import torch
import sys
# Usage: python z_image_convert_original_to_comfy.py output.safetensors diffusion_model*.safetensors
cast_to = None
if "fp8_e4m3fn" in sys.argv[1]:
cast_to = torch.float8_e4m3fn
elif "fp16" in sys.argv[1]:
cast_to = torch.float16
elif "bf16" in sys.argv[1]:
cast_to = torch.bfloat16
replace_keys = {"all_final_layer.2-1.": "final_layer.",
"all_x_embedder.2-1.": "x_embedder.",
".attention.to_out.0.bias": ".attention.out.bias",
".attention.norm_k.weight": ".attention.k_norm.weight",
".attention.norm_q.weight": ".attention.q_norm.weight",
".attention.to_out.0.weight": ".attention.out.weight"
}
out_sd = {}
for f in sys.argv[2:]:
sd = safetensors.torch.load_file(f)
cc = None
for k in sd:
w = sd[k]
if cast_to is not None:
w = w.to(cast_to)
k_out = k
if k_out.endswith(".attention.to_out.0.bias"):
continue
if k_out.endswith(".attention.to_k.weight"):
cc = [w]
continue
if k_out.endswith(".attention.to_q.weight"):
cc = [w] + cc
continue
if k_out.endswith(".attention.to_v.weight"):
cc = cc + [w]
w = torch.cat(cc, dim=0)
k_out = k_out.replace(".attention.to_v.weight", ".attention.qkv.weight")
for r, rr in replace_keys.items():
k_out = k_out.replace(r, rr)
out_sd[k_out] = w
safetensors.torch.save_file(out_sd, sys.argv[1])