|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from safetensors.torch import safe_open, save_file |
|
import torch |
|
import os |
|
from pathlib import Path |
|
import json |
|
import re |
|
|
|
model_dir_name = "DeepSeek-V3-bf16" |
|
model_dir_path = Path(model_dir_name) |
|
|
|
output_dir_name = "DeepSeek-V3-slice" |
|
output_dir_path = Path(output_dir_name) |
|
os.makedirs(output_dir_name, exist_ok=True) |
|
|
|
try: |
|
tensor_map_json = json.load(open(model_dir_path / "model.safetensors.index.json")) |
|
weight_map = tensor_map_json["weight_map"] |
|
except FileNotFoundError: |
|
print("モデルのインデックスファイルが見つかりません") |
|
raise |
|
|
|
tensor_files = list(set(weight_map.values())) |
|
tensor_files.sort() |
|
print(f"変換対象のファイル数: {len(tensor_files)}") |
|
|
|
try: |
|
config_json = json.load(open(model_dir_path / "config.json")) |
|
except FileNotFoundError: |
|
print("モデルの設定ファイルが見つかりません") |
|
raise |
|
|
|
|
|
n_routed_experts = int(config_json["n_routed_experts"]) |
|
|
|
|
|
num_hidden_layers = int(config_json["num_hidden_layers"]) |
|
|
|
|
|
num_experts_per_tok = int(config_json["num_experts_per_tok"]) |
|
|
|
|
|
first_k_dense_replace = int(config_json["first_k_dense_replace"]) |
|
|
|
converted_tensors_size = 0 |
|
|
|
target_n_routed_experts = 64 |
|
|
|
|
|
def print_tensor_info(tensor, key, new_key=None): |
|
print(f"key: {key} to {new_key if new_key else key}, shape: {tensor.shape}, size: {tensor.numel() * tensor.element_size() } Byte") |
|
|
|
def ensure_tensor_has_data(tensor): |
|
try: |
|
|
|
tensor[0] |
|
return tensor |
|
except Exception as e: |
|
print(f"テンソルの再構築が必要: {e}") |
|
|
|
return torch.tensor(tensor.cpu().numpy(), dtype=tensor.dtype) |
|
|
|
with open("layer_topk_idx_distribution.json", "r") as f: |
|
layer_topk_idx_distribution = json.load(f) |
|
|
|
|
|
for i, tensor_file_name in enumerate(tensor_files, 1): |
|
print(f"\n処理中: {tensor_file_name} ({i}/{len(tensor_files)})") |
|
|
|
tensor_path = model_dir_path / tensor_file_name |
|
tensor_data = safe_open(tensor_path, framework="pt") |
|
converted_tensors = {} |
|
|
|
for key in tensor_data.keys(): |
|
tensor = tensor_data.get_tensor(key) |
|
tensor = ensure_tensor_has_data(tensor) |
|
|
|
|
|
layer_idx = int(re.search(r'model\.layers\.(\d+)\.', key).group(1)) if re.search(r'model\.layers\.(\d+)\.', key) else -1 |
|
|
|
|
|
if layer_idx < first_k_dense_replace: |
|
converted_tensors[key] = tensor.clone() |
|
converted_tensors_size += tensor.numel() * tensor.element_size() |
|
print_tensor_info(tensor, key, key) |
|
continue |
|
|
|
if layer_idx >= num_hidden_layers: |
|
del tensor_map_json["weight_map"][key] |
|
continue |
|
|
|
|
|
if str(layer_idx) in layer_topk_idx_distribution: |
|
experts_list = layer_topk_idx_distribution[str(layer_idx)]["experts"][:target_n_routed_experts] |
|
else: |
|
step = n_routed_experts // target_n_routed_experts |
|
experts_list = list(range(0, n_routed_experts, step))[:target_n_routed_experts] |
|
experts_list.sort() |
|
experts_tensor = torch.tensor(experts_list, dtype=torch.long, device=tensor.device) |
|
|
|
|
|
if ".mlp.experts." in key: |
|
experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1)) |
|
if experts_idx in experts_list: |
|
new_key = key.replace(f".mlp.experts.{experts_idx}.", f".mlp.experts.{experts_list.index(experts_idx)}.") |
|
converted_tensors[new_key] = tensor.clone() |
|
converted_tensors_size += tensor.numel() * tensor.element_size() |
|
print_tensor_info(tensor, key, new_key) |
|
tensor_map_json["weight_map"][new_key] = tensor_file_name |
|
else: |
|
print(f"skip experts: {key}") |
|
continue |
|
|
|
|
|
if ".mlp.shared_experts." in key: |
|
|
|
converted_tensors[key] = tensor.clone() |
|
converted_tensors_size += tensor.numel() * tensor.element_size() |
|
print_tensor_info(tensor, key, key) |
|
continue |
|
|
|
if ".mlp.gate.e_score_correction_bias" in key: |
|
|
|
squeezed_tensor = tensor[experts_tensor].clone() |
|
converted_tensors[key] = squeezed_tensor |
|
converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size() |
|
print_tensor_info(squeezed_tensor, key, key) |
|
continue |
|
|
|
if ".mlp.gate.weight" in key: |
|
|
|
squeezed_tensor = tensor[experts_tensor, :].clone() |
|
converted_tensors[key] = squeezed_tensor |
|
converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size() |
|
print_tensor_info(squeezed_tensor, key, key) |
|
continue |
|
|
|
converted_tensors[key] = tensor.clone() |
|
converted_tensors_size += tensor.numel() * tensor.element_size() |
|
print_tensor_info(tensor, key, key) |
|
|
|
save_file(converted_tensors, output_dir_path / tensor_file_name, metadata={"format": "pt"}) |
|
|
|
print(f"\n変換完了!") |
|
print(f"合計サイズ: {converted_tensors_size / (1024**3):.2f} GB") |
|
|
|
|
|
|
|
old_keys = list(tensor_map_json["weight_map"].keys()) |
|
for key in old_keys: |
|
if ".mlp.experts." in key: |
|
experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1)) |
|
if experts_idx >= target_n_routed_experts: |
|
del tensor_map_json["weight_map"][key] |
|
|
|
|
|
tensor_map_json["metadata"]["total_size"] = converted_tensors_size |
|
with open(output_dir_path / "model.safetensors.index.json", "w") as f: |
|
json.dump(tensor_map_json, f, indent=4) |
|
|
|
|
|
output_config_json = config_json.copy() |
|
output_config_json["n_routed_experts"] = target_n_routed_experts |
|
|
|
output_config_json["num_experts_per_tok"] = 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
with open(output_dir_path / "config.json", "w") as f: |
|
json.dump(output_config_json, f, indent=4) |
|
|