DeepSeek-V3-slice-jp64 / scripts /deepseek_slice.py
mmnga's picture
Add files using upload-large-folder tool
bffc5ed verified
raw
history blame
6.83 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
from safetensors.torch import safe_open, save_file
import torch
import os
from pathlib import Path
import json
import re
model_dir_name = "DeepSeek-V3-bf16"
model_dir_path = Path(model_dir_name)
output_dir_name = "DeepSeek-V3-slice"
output_dir_path = Path(output_dir_name)
os.makedirs(output_dir_name, exist_ok=True)
try:
tensor_map_json = json.load(open(model_dir_path / "model.safetensors.index.json"))
weight_map = tensor_map_json["weight_map"]
except FileNotFoundError:
print("モデルのインデックスファイルが見つかりません")
raise
tensor_files = list(set(weight_map.values()))
tensor_files.sort()
print(f"変換対象のファイル数: {len(tensor_files)}")
try:
config_json = json.load(open(model_dir_path / "config.json"))
except FileNotFoundError:
print("モデルの設定ファイルが見つかりません")
raise
# experts
n_routed_experts = int(config_json["n_routed_experts"])
# layers
num_hidden_layers = int(config_json["num_hidden_layers"])
# active experts
num_experts_per_tok = int(config_json["num_experts_per_tok"])
# このlayer-idxからdenseレイヤーをMoEにする
first_k_dense_replace = int(config_json["first_k_dense_replace"])
converted_tensors_size = 0
target_n_routed_experts = 64
def print_tensor_info(tensor, key, new_key=None):
print(f"key: {key} to {new_key if new_key else key}, shape: {tensor.shape}, size: {tensor.numel() * tensor.element_size() } Byte")
def ensure_tensor_has_data(tensor):
try:
# テンソルが実際にアクセス可能かテスト
tensor[0]
return tensor
except Exception as e:
print(f"テンソルの再構築が必要: {e}")
# テンソルを明示的に再構築
return torch.tensor(tensor.cpu().numpy(), dtype=tensor.dtype)
with open("layer_topk_idx_distribution.json", "r") as f:
layer_topk_idx_distribution = json.load(f)
for i, tensor_file_name in enumerate(tensor_files, 1):
print(f"\n処理中: {tensor_file_name} ({i}/{len(tensor_files)})")
tensor_path = model_dir_path / tensor_file_name
tensor_data = safe_open(tensor_path, framework="pt")
converted_tensors = {}
for key in tensor_data.keys():
tensor = tensor_data.get_tensor(key)
tensor = ensure_tensor_has_data(tensor) # テンソルの実データを確保
# レイヤーidxを取得 model.layers.0.から数値 ない場合もある
layer_idx = int(re.search(r'model\.layers\.(\d+)\.', key).group(1)) if re.search(r'model\.layers\.(\d+)\.', key) else -1
# レイヤーidxがない場合はそのまま保存
if layer_idx < first_k_dense_replace:
converted_tensors[key] = tensor.clone()
converted_tensors_size += tensor.numel() * tensor.element_size()
print_tensor_info(tensor, key, key)
continue
if layer_idx >= num_hidden_layers:
del tensor_map_json["weight_map"][key]
continue
# layer_topk_idx_distribution から当該レイヤーで使いたい experts idx を取得
if str(layer_idx) in layer_topk_idx_distribution:
experts_list = layer_topk_idx_distribution[str(layer_idx)]["experts"][:target_n_routed_experts]
else:
step = n_routed_experts // target_n_routed_experts
experts_list = list(range(0, n_routed_experts, step))[:target_n_routed_experts]
experts_list.sort()
experts_tensor = torch.tensor(experts_list, dtype=torch.long, device=tensor.device)
# experts
if ".mlp.experts." in key:
experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1))
if experts_idx in experts_list:
new_key = key.replace(f".mlp.experts.{experts_idx}.", f".mlp.experts.{experts_list.index(experts_idx)}.")
converted_tensors[new_key] = tensor.clone()
converted_tensors_size += tensor.numel() * tensor.element_size()
print_tensor_info(tensor, key, new_key)
tensor_map_json["weight_map"][new_key] = tensor_file_name
else:
print(f"skip experts: {key}")
continue
# shared-experts
if ".mlp.shared_experts." in key:
# shared-expertsを保存
converted_tensors[key] = tensor.clone()
converted_tensors_size += tensor.numel() * tensor.element_size()
print_tensor_info(tensor, key, key)
continue
if ".mlp.gate.e_score_correction_bias" in key:
# Tensor [256]を [target_n_routed_experts]に変換
squeezed_tensor = tensor[experts_tensor].clone()
converted_tensors[key] = squeezed_tensor
converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size()
print_tensor_info(squeezed_tensor, key, key)
continue
if ".mlp.gate.weight" in key:
# Tensor [256, 7168]を [target_n_routed_experts, 7168]に変換
squeezed_tensor = tensor[experts_tensor, :].clone()
converted_tensors[key] = squeezed_tensor
converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size()
print_tensor_info(squeezed_tensor, key, key)
continue
converted_tensors[key] = tensor.clone()
converted_tensors_size += tensor.numel() * tensor.element_size()
print_tensor_info(tensor, key, key)
save_file(converted_tensors, output_dir_path / tensor_file_name, metadata={"format": "pt"})
print(f"\n変換完了!")
print(f"合計サイズ: {converted_tensors_size / (1024**3):.2f} GB")
# model.safetensors.index.json
old_keys = list(tensor_map_json["weight_map"].keys())
for key in old_keys:
if ".mlp.experts." in key:
experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1))
if experts_idx >= target_n_routed_experts:
del tensor_map_json["weight_map"][key]
tensor_map_json["metadata"]["total_size"] = converted_tensors_size
with open(output_dir_path / "model.safetensors.index.json", "w") as f:
json.dump(tensor_map_json, f, indent=4)
# config.json
output_config_json = config_json.copy()
output_config_json["n_routed_experts"] = target_n_routed_experts
# output_config_json["num_hidden_layers"] = num_hidden_layers
output_config_json["num_experts_per_tok"] = 4
# output_config_json["first_k_dense_replace"] = first_k_dense_replace
# output_config_json["n_shared_experts"] = n_shared_experts
# output_config_json["topk_group"] = topk_group
# output_config_json["n_group"] = n_group
with open(output_dir_path / "config.json", "w") as f:
json.dump(output_config_json, f, indent=4)