File size: 6,825 Bytes
bffc5ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
from transformers import AutoModelForCausalLM, AutoTokenizer
from safetensors.torch import safe_open, save_file
import torch
import os
from pathlib import Path
import json
import re

model_dir_name = "DeepSeek-V3-bf16"
model_dir_path = Path(model_dir_name)

output_dir_name = "DeepSeek-V3-slice"
output_dir_path = Path(output_dir_name)
os.makedirs(output_dir_name, exist_ok=True)

try:
    tensor_map_json = json.load(open(model_dir_path / "model.safetensors.index.json"))
    weight_map = tensor_map_json["weight_map"]
except FileNotFoundError:
    print("モデルのインデックスファイルが見つかりません")
    raise

tensor_files = list(set(weight_map.values()))
tensor_files.sort()
print(f"変換対象のファイル数: {len(tensor_files)}")

try:
    config_json = json.load(open(model_dir_path / "config.json"))
except FileNotFoundError:
    print("モデルの設定ファイルが見つかりません")
    raise

# experts
n_routed_experts  = int(config_json["n_routed_experts"]) 

# layers
num_hidden_layers = int(config_json["num_hidden_layers"]) 

# active experts
num_experts_per_tok = int(config_json["num_experts_per_tok"]) 

# このlayer-idxからdenseレイヤーをMoEにする
first_k_dense_replace = int(config_json["first_k_dense_replace"])

converted_tensors_size = 0

target_n_routed_experts = 64


def print_tensor_info(tensor, key, new_key=None):
    print(f"key: {key} to {new_key if new_key else key}, shape: {tensor.shape}, size: {tensor.numel() * tensor.element_size() } Byte")

def ensure_tensor_has_data(tensor):
    try:
        # テンソルが実際にアクセス可能かテスト
        tensor[0]
        return tensor
    except Exception as e:
        print(f"テンソルの再構築が必要: {e}")
        # テンソルを明示的に再構築
        return torch.tensor(tensor.cpu().numpy(), dtype=tensor.dtype)

with open("layer_topk_idx_distribution.json", "r") as f:
    layer_topk_idx_distribution = json.load(f)


for i, tensor_file_name in enumerate(tensor_files, 1):
    print(f"\n処理中: {tensor_file_name} ({i}/{len(tensor_files)})")
    
    tensor_path = model_dir_path / tensor_file_name
    tensor_data = safe_open(tensor_path, framework="pt")
    converted_tensors = {}
    
    for key in tensor_data.keys():
        tensor = tensor_data.get_tensor(key)
        tensor = ensure_tensor_has_data(tensor)  # テンソルの実データを確保

        # レイヤーidxを取得 model.layers.0.から数値 ない場合もある
        layer_idx = int(re.search(r'model\.layers\.(\d+)\.', key).group(1)) if re.search(r'model\.layers\.(\d+)\.', key) else -1

        # レイヤーidxがない場合はそのまま保存
        if layer_idx < first_k_dense_replace:
            converted_tensors[key] = tensor.clone()
            converted_tensors_size += tensor.numel() * tensor.element_size()
            print_tensor_info(tensor, key, key)
            continue

        if layer_idx >= num_hidden_layers:
            del tensor_map_json["weight_map"][key]
            continue

        # layer_topk_idx_distribution から当該レイヤーで使いたい experts idx を取得
        if str(layer_idx) in layer_topk_idx_distribution:
            experts_list = layer_topk_idx_distribution[str(layer_idx)]["experts"][:target_n_routed_experts]
        else:
            step = n_routed_experts // target_n_routed_experts
            experts_list = list(range(0, n_routed_experts, step))[:target_n_routed_experts]
        experts_list.sort()
        experts_tensor = torch.tensor(experts_list, dtype=torch.long, device=tensor.device)

        # experts
        if ".mlp.experts." in key:
            experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1))
            if experts_idx in experts_list:
                new_key = key.replace(f".mlp.experts.{experts_idx}.", f".mlp.experts.{experts_list.index(experts_idx)}.")
                converted_tensors[new_key] = tensor.clone()
                converted_tensors_size += tensor.numel() * tensor.element_size()
                print_tensor_info(tensor, key, new_key)
                tensor_map_json["weight_map"][new_key] = tensor_file_name
            else:
                print(f"skip experts: {key}")
            continue

        # shared-experts
        if ".mlp.shared_experts." in key:
            # shared-expertsを保存
            converted_tensors[key] = tensor.clone()
            converted_tensors_size += tensor.numel() * tensor.element_size()
            print_tensor_info(tensor, key, key)
            continue

        if ".mlp.gate.e_score_correction_bias" in key:
            # Tensor [256]を [target_n_routed_experts]に変換
            squeezed_tensor = tensor[experts_tensor].clone()
            converted_tensors[key] = squeezed_tensor
            converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size()
            print_tensor_info(squeezed_tensor, key, key)
            continue

        if ".mlp.gate.weight" in key:
            # Tensor [256, 7168]を [target_n_routed_experts, 7168]に変換
            squeezed_tensor = tensor[experts_tensor, :].clone()
            converted_tensors[key] = squeezed_tensor
            converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size()
            print_tensor_info(squeezed_tensor, key, key)
            continue
        
        converted_tensors[key] = tensor.clone()
        converted_tensors_size += tensor.numel() * tensor.element_size()
        print_tensor_info(tensor, key, key)

    save_file(converted_tensors, output_dir_path / tensor_file_name, metadata={"format": "pt"})

print(f"\n変換完了!")
print(f"合計サイズ: {converted_tensors_size / (1024**3):.2f} GB")

# model.safetensors.index.json

old_keys = list(tensor_map_json["weight_map"].keys())
for key in old_keys:
    if ".mlp.experts." in key:
        experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1))
        if experts_idx >= target_n_routed_experts:
            del tensor_map_json["weight_map"][key]


tensor_map_json["metadata"]["total_size"] = converted_tensors_size
with open(output_dir_path / "model.safetensors.index.json", "w") as f:
    json.dump(tensor_map_json, f, indent=4)

# config.json
output_config_json = config_json.copy()
output_config_json["n_routed_experts"] = target_n_routed_experts
# output_config_json["num_hidden_layers"] = num_hidden_layers
output_config_json["num_experts_per_tok"] = 4
# output_config_json["first_k_dense_replace"] = first_k_dense_replace
# output_config_json["n_shared_experts"] = n_shared_experts
# output_config_json["topk_group"] = topk_group
# output_config_json["n_group"] = n_group


with open(output_dir_path / "config.json", "w") as f:
    json.dump(output_config_json, f, indent=4)