from transformers import AutoModelForCausalLM, AutoTokenizer from safetensors.torch import safe_open, save_file import torch import os from pathlib import Path import json import re model_dir_name = "DeepSeek-V3-bf16" model_dir_path = Path(model_dir_name) output_dir_name = "DeepSeek-V3-slice" output_dir_path = Path(output_dir_name) os.makedirs(output_dir_name, exist_ok=True) try: tensor_map_json = json.load(open(model_dir_path / "model.safetensors.index.json")) weight_map = tensor_map_json["weight_map"] except FileNotFoundError: print("モデルのインデックスファイルが見つかりません") raise tensor_files = list(set(weight_map.values())) tensor_files.sort() print(f"変換対象のファイル数: {len(tensor_files)}") try: config_json = json.load(open(model_dir_path / "config.json")) except FileNotFoundError: print("モデルの設定ファイルが見つかりません") raise # experts n_routed_experts = int(config_json["n_routed_experts"]) # layers num_hidden_layers = int(config_json["num_hidden_layers"]) # active experts num_experts_per_tok = int(config_json["num_experts_per_tok"]) # このlayer-idxからdenseレイヤーをMoEにする first_k_dense_replace = int(config_json["first_k_dense_replace"]) converted_tensors_size = 0 target_n_routed_experts = 64 def print_tensor_info(tensor, key, new_key=None): print(f"key: {key} to {new_key if new_key else key}, shape: {tensor.shape}, size: {tensor.numel() * tensor.element_size() } Byte") def ensure_tensor_has_data(tensor): try: # テンソルが実際にアクセス可能かテスト tensor[0] return tensor except Exception as e: print(f"テンソルの再構築が必要: {e}") # テンソルを明示的に再構築 return torch.tensor(tensor.cpu().numpy(), dtype=tensor.dtype) with open("layer_topk_idx_distribution.json", "r") as f: layer_topk_idx_distribution = json.load(f) for i, tensor_file_name in enumerate(tensor_files, 1): print(f"\n処理中: {tensor_file_name} ({i}/{len(tensor_files)})") tensor_path = model_dir_path / tensor_file_name tensor_data = safe_open(tensor_path, framework="pt") converted_tensors = {} for key in tensor_data.keys(): tensor = tensor_data.get_tensor(key) tensor = ensure_tensor_has_data(tensor) # テンソルの実データを確保 # レイヤーidxを取得 model.layers.0.から数値 ない場合もある layer_idx = int(re.search(r'model\.layers\.(\d+)\.', key).group(1)) if re.search(r'model\.layers\.(\d+)\.', key) else -1 # レイヤーidxがない場合はそのまま保存 if layer_idx < first_k_dense_replace: converted_tensors[key] = tensor.clone() converted_tensors_size += tensor.numel() * tensor.element_size() print_tensor_info(tensor, key, key) continue if layer_idx >= num_hidden_layers: del tensor_map_json["weight_map"][key] continue # layer_topk_idx_distribution から当該レイヤーで使いたい experts idx を取得 if str(layer_idx) in layer_topk_idx_distribution: experts_list = layer_topk_idx_distribution[str(layer_idx)]["experts"][:target_n_routed_experts] else: step = n_routed_experts // target_n_routed_experts experts_list = list(range(0, n_routed_experts, step))[:target_n_routed_experts] experts_list.sort() experts_tensor = torch.tensor(experts_list, dtype=torch.long, device=tensor.device) # experts if ".mlp.experts." in key: experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1)) if experts_idx in experts_list: new_key = key.replace(f".mlp.experts.{experts_idx}.", f".mlp.experts.{experts_list.index(experts_idx)}.") converted_tensors[new_key] = tensor.clone() converted_tensors_size += tensor.numel() * tensor.element_size() print_tensor_info(tensor, key, new_key) tensor_map_json["weight_map"][new_key] = tensor_file_name else: print(f"skip experts: {key}") continue # shared-experts if ".mlp.shared_experts." in key: # shared-expertsを保存 converted_tensors[key] = tensor.clone() converted_tensors_size += tensor.numel() * tensor.element_size() print_tensor_info(tensor, key, key) continue if ".mlp.gate.e_score_correction_bias" in key: # Tensor [256]を [target_n_routed_experts]に変換 squeezed_tensor = tensor[experts_tensor].clone() converted_tensors[key] = squeezed_tensor converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size() print_tensor_info(squeezed_tensor, key, key) continue if ".mlp.gate.weight" in key: # Tensor [256, 7168]を [target_n_routed_experts, 7168]に変換 squeezed_tensor = tensor[experts_tensor, :].clone() converted_tensors[key] = squeezed_tensor converted_tensors_size += squeezed_tensor.numel() * squeezed_tensor.element_size() print_tensor_info(squeezed_tensor, key, key) continue converted_tensors[key] = tensor.clone() converted_tensors_size += tensor.numel() * tensor.element_size() print_tensor_info(tensor, key, key) save_file(converted_tensors, output_dir_path / tensor_file_name, metadata={"format": "pt"}) print(f"\n変換完了!") print(f"合計サイズ: {converted_tensors_size / (1024**3):.2f} GB") # model.safetensors.index.json old_keys = list(tensor_map_json["weight_map"].keys()) for key in old_keys: if ".mlp.experts." in key: experts_idx = int(re.search(r'\.mlp\.experts\.(\d+)\.', key).group(1)) if experts_idx >= target_n_routed_experts: del tensor_map_json["weight_map"][key] tensor_map_json["metadata"]["total_size"] = converted_tensors_size with open(output_dir_path / "model.safetensors.index.json", "w") as f: json.dump(tensor_map_json, f, indent=4) # config.json output_config_json = config_json.copy() output_config_json["n_routed_experts"] = target_n_routed_experts # output_config_json["num_hidden_layers"] = num_hidden_layers output_config_json["num_experts_per_tok"] = 4 # output_config_json["first_k_dense_replace"] = first_k_dense_replace # output_config_json["n_shared_experts"] = n_shared_experts # output_config_json["topk_group"] = topk_group # output_config_json["n_group"] = n_group with open(output_dir_path / "config.json", "w") as f: json.dump(output_config_json, f, indent=4)