|
|
|
|
|
|
|
|
|
|
|
""" |
|
File Description: |
|
ernie3.0 series model conversion based on paddlenlp repository |
|
ernie2.0 series model conversion based on paddlenlp repository |
|
official repo: https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo |
|
Author: nghuyong liushu |
|
Mail: nghuyong@163.com 1554987494@qq.com |
|
Created Time: 2022/8/17 |
|
""" |
|
import collections |
|
import os |
|
import json |
|
import paddle.fluid.dygraph as D |
|
import torch |
|
from paddle import fluid |
|
import numpy as np |
|
|
|
def build_params_map(attention_num=12): |
|
""" |
|
build params map from paddle-paddle's ERNIE to transformer's BERT |
|
:return: |
|
""" |
|
weight_map = collections.OrderedDict({ |
|
'embeddings.word_embeddings.weight': "embeddings.word_embeddings.weight", |
|
'embeddings.position_embeddings.weight': "embeddings.position_embeddings.weight", |
|
|
|
|
|
'embeddings.layer_norm.weight': 'embeddings.layer_norm.weight', |
|
'embeddings.layer_norm.bias': 'embeddings.layer_norm.bias', |
|
}) |
|
|
|
for i in range(attention_num): |
|
weight_map[f'encoder.layers.{i}.self_attn.q_proj.weight'] = f'encoder.layers.{i}.self_attn.q_proj.weight' |
|
weight_map[f'encoder.layers.{i}.self_attn.q_proj.bias'] = f'encoder.layers.{i}.self_attn.q_proj.bias' |
|
weight_map[f'encoder.layers.{i}.self_attn.k_proj.weight'] = f'encoder.layers.{i}.self_attn.k_proj.weight' |
|
weight_map[f'encoder.layers.{i}.self_attn.k_proj.bias'] = f'encoder.layers.{i}.self_attn.k_proj.bias' |
|
weight_map[f'encoder.layers.{i}.self_attn.v_proj.weight'] = f'encoder.layers.{i}.self_attn.v_proj.weight' |
|
weight_map[f'encoder.layers.{i}.self_attn.v_proj.bias'] = f'encoder.layers.{i}.self_attn.v_proj.bias' |
|
weight_map[f'encoder.layers.{i}.self_attn.out_proj.weight'] = f'encoder.layers.{i}.self_attn.out_proj.weight' |
|
weight_map[f'encoder.layers.{i}.self_attn.out_proj.bias'] = f'encoder.layers.{i}.self_attn.out_proj.bias' |
|
weight_map[f'encoder.layers.{i}.norm1.weight'] = f'encoder.layers.{i}.norm1.weight' |
|
weight_map[f'encoder.layers.{i}.norm1.bias'] = f'encoder.layers.{i}.norm1.bias' |
|
weight_map[f'encoder.layers.{i}.linear1.weight'] = f'encoder.layers.{i}.linear1.weight' |
|
weight_map[f'encoder.layers.{i}.linear1.bias'] = f'encoder.layers.{i}.linear1.bias' |
|
weight_map[f'encoder.layers.{i}.linear2.weight'] = f'encoder.layers.{i}.linear2.weight' |
|
weight_map[f'encoder.layers.{i}.linear2.bias'] = f'encoder.layers.{i}.linear2.bias' |
|
weight_map[f'encoder.layers.{i}.norm2.weight'] = f'encoder.layers.{i}.norm2.weight' |
|
weight_map[f'encoder.layers.{i}.norm2.bias'] = f'encoder.layers.{i}.norm2.bias' |
|
|
|
weight_map.update( |
|
{ |
|
'pooler.dense.weight': 'pooler.dense.weight', |
|
'pooler.dense.bias': 'pooler.dense.bias', |
|
|
|
|
|
|
|
|
|
|
|
} |
|
) |
|
return weight_map |
|
|
|
|
|
def extract_and_convert(input_dir, output_dir): |
|
""" |
|
抽取并转换 |
|
:param input_dir: |
|
:param output_dir: |
|
:return: |
|
""" |
|
if not os.path.exists(output_dir): |
|
os.makedirs(output_dir) |
|
print('=' * 20 + 'save config file' + '=' * 20) |
|
config = json.load(open(os.path.join(input_dir, 'config.json'), 'rt', encoding='utf-8')) |
|
|
|
|
|
|
|
config['layer_norm_eps'] = 1e-5 |
|
|
|
|
|
|
|
json.dump(config, open(os.path.join(output_dir, 'config.json'), 'wt', encoding='utf-8'), indent=4) |
|
print('=' * 20 + 'save vocab file' + '=' * 20) |
|
with open(os.path.join(input_dir, 'vocab.txt'), 'rt', encoding='utf-8') as f: |
|
words = f.read().splitlines() |
|
words = [word.split('\t')[0] for word in words] |
|
with open(os.path.join(output_dir, 'vocab.txt'), 'wt', encoding='utf-8') as f: |
|
for word in words: |
|
f.write(word + "\n") |
|
print('=' * 20 + 'extract weights' + '=' * 20) |
|
state_dict = collections.OrderedDict() |
|
weight_map = build_params_map(attention_num=config['num_hidden_layers']) |
|
with fluid.dygraph.guard(): |
|
paddle_paddle_params, _ = D.load_dygraph(os.path.join(input_dir, 'model_state.pdparams')) |
|
for weight_name, weight_value in paddle_paddle_params.items(): |
|
if 'weight' in weight_name: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "encoder" in weight_name: |
|
if "linear1" in weight_name or "linear2" in weight_name: |
|
weight_value = weight_value.transpose() |
|
else: |
|
weight_value = weight_value.transpose() |
|
|
|
if weight_name not in weight_map: |
|
print('=' * 20, '[SKIP]', weight_name, '=' * 20) |
|
continue |
|
state_dict[weight_map[weight_name]] = torch.FloatTensor(weight_value) |
|
print(weight_name, '->', weight_map[weight_name], weight_value.shape) |
|
torch.save(state_dict, os.path.join(output_dir, "pytorch_model.bin")) |
|
|
|
|
|
if __name__ == '__main__': |
|
extract_and_convert("./ernie_m_large_paddle/", "./ernie_m_large_torch/") |