File size: 6,421 Bytes
f3581c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# Copied from https://github.com/nghuyong/ERNIE-Pytorch/blob/master/convert.py
#!/usr/bin/env python
# encoding: utf-8
"""
File Description:
ernie3.0 series model conversion based on paddlenlp repository
ernie2.0 series model conversion based on paddlenlp repository
official repo: https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo
Author: nghuyong liushu
Mail: nghuyong@163.com 1554987494@qq.com
Created Time: 2022/8/17
"""
import collections
import os
import json
import paddle.fluid.dygraph as D
import torch
from paddle import fluid
import numpy as np
def build_params_map(attention_num=12):
"""
build params map from paddle-paddle's ERNIE to transformer's BERT
:return:
"""
weight_map = collections.OrderedDict({
'embeddings.word_embeddings.weight': "embeddings.word_embeddings.weight",
'embeddings.position_embeddings.weight': "embeddings.position_embeddings.weight",
# 'ernie.embeddings.token_type_embeddings.weight': "ernie.embeddings.token_type_embeddings.weight",
# 'ernie.embeddings.task_type_embeddings.weight': "ernie.embeddings.task_type_embeddings.weight",
'embeddings.layer_norm.weight': 'embeddings.layer_norm.weight',
'embeddings.layer_norm.bias': 'embeddings.layer_norm.bias',
})
# add attention layers
for i in range(attention_num):
weight_map[f'encoder.layers.{i}.self_attn.q_proj.weight'] = f'encoder.layers.{i}.self_attn.q_proj.weight'
weight_map[f'encoder.layers.{i}.self_attn.q_proj.bias'] = f'encoder.layers.{i}.self_attn.q_proj.bias'
weight_map[f'encoder.layers.{i}.self_attn.k_proj.weight'] = f'encoder.layers.{i}.self_attn.k_proj.weight'
weight_map[f'encoder.layers.{i}.self_attn.k_proj.bias'] = f'encoder.layers.{i}.self_attn.k_proj.bias'
weight_map[f'encoder.layers.{i}.self_attn.v_proj.weight'] = f'encoder.layers.{i}.self_attn.v_proj.weight'
weight_map[f'encoder.layers.{i}.self_attn.v_proj.bias'] = f'encoder.layers.{i}.self_attn.v_proj.bias'
weight_map[f'encoder.layers.{i}.self_attn.out_proj.weight'] = f'encoder.layers.{i}.self_attn.out_proj.weight'
weight_map[f'encoder.layers.{i}.self_attn.out_proj.bias'] = f'encoder.layers.{i}.self_attn.out_proj.bias'
weight_map[f'encoder.layers.{i}.norm1.weight'] = f'encoder.layers.{i}.norm1.weight'
weight_map[f'encoder.layers.{i}.norm1.bias'] = f'encoder.layers.{i}.norm1.bias'
weight_map[f'encoder.layers.{i}.linear1.weight'] = f'encoder.layers.{i}.linear1.weight'
weight_map[f'encoder.layers.{i}.linear1.bias'] = f'encoder.layers.{i}.linear1.bias'
weight_map[f'encoder.layers.{i}.linear2.weight'] = f'encoder.layers.{i}.linear2.weight'
weight_map[f'encoder.layers.{i}.linear2.bias'] = f'encoder.layers.{i}.linear2.bias'
weight_map[f'encoder.layers.{i}.norm2.weight'] = f'encoder.layers.{i}.norm2.weight'
weight_map[f'encoder.layers.{i}.norm2.bias'] = f'encoder.layers.{i}.norm2.bias'
#
weight_map.update(
{
'pooler.dense.weight': 'pooler.dense.weight',
'pooler.dense.bias': 'pooler.dense.bias',
# 'cls.predictions.transform.weight': 'cls.predictions.transform.dense.weight',
# 'cls.predictions.transform.bias': 'cls.predictions.transform.dense.bias',
# 'cls.predictions.layer_norm.weight': 'cls.predictions.transform.LayerNorm.gamma',
# 'cls.predictions.layer_norm.bias': 'cls.predictions.transform.LayerNorm.beta',
# 'cls.predictions.decoder_bias': 'cls.predictions.bias'
}
)
return weight_map
def extract_and_convert(input_dir, output_dir):
"""
抽取并转换
:param input_dir:
:param output_dir:
:return:
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print('=' * 20 + 'save config file' + '=' * 20)
config = json.load(open(os.path.join(input_dir, 'config.json'), 'rt', encoding='utf-8'))
# if 'init_args' in config:
# config = config['init_args'][0]
# del config['init_class']
config['layer_norm_eps'] = 1e-5
# config['model_type'] = 'ernie'
# config['architectures'] = ["ErnieForMaskedLM"] # or 'BertModel'
# config['intermediate_size'] = 4 * config['hidden_size']
json.dump(config, open(os.path.join(output_dir, 'config.json'), 'wt', encoding='utf-8'), indent=4)
print('=' * 20 + 'save vocab file' + '=' * 20)
with open(os.path.join(input_dir, 'vocab.txt'), 'rt', encoding='utf-8') as f:
words = f.read().splitlines()
words = [word.split('\t')[0] for word in words]
with open(os.path.join(output_dir, 'vocab.txt'), 'wt', encoding='utf-8') as f:
for word in words:
f.write(word + "\n")
print('=' * 20 + 'extract weights' + '=' * 20)
state_dict = collections.OrderedDict()
weight_map = build_params_map(attention_num=config['num_hidden_layers'])
with fluid.dygraph.guard():
paddle_paddle_params, _ = D.load_dygraph(os.path.join(input_dir, 'model_state.pdparams'))
for weight_name, weight_value in paddle_paddle_params.items():
if 'weight' in weight_name:
# if 'encoder' in weight_name or 'pooler' in weight_name or 'cls.' in weight_name:
# weight_value = weight_value.transpose()
# if 'encoder' in weight_name or 'pooler' in weight_name or 'cls.' in weight_name and \
# "k_proj" not in weight_name and "v_proj" not in weight_name and \
# "out_proj" not in weight_name and "linear1" not in weight_name and \
# "linear2" not in weight_name:
# weight_value = weight_value.transpose()
if "encoder" in weight_name:
if "linear1" in weight_name or "linear2" in weight_name:
weight_value = weight_value.transpose()
else:
weight_value = weight_value.transpose()
if weight_name not in weight_map:
print('=' * 20, '[SKIP]', weight_name, '=' * 20)
continue
state_dict[weight_map[weight_name]] = torch.FloatTensor(weight_value)
print(weight_name, '->', weight_map[weight_name], weight_value.shape)
torch.save(state_dict, os.path.join(output_dir, "pytorch_model.bin"))
if __name__ == '__main__':
extract_and_convert("./ernie_m_large_paddle/", "./ernie_m_large_torch/") |