from transformers import BertTokenizer | |
from modeling_arctic_s_bge_small import ConcatModel, ConcatModelConfig | |
config = ConcatModelConfig() | |
model = ConcatModel(config) | |
model.load_weights_from_automodels( | |
in_models=['Snowflake/snowflake-arctic-embed-s', 'BAAI/bge-small-en-v1.5'], | |
has_pooling_layer=[True, True] | |
) | |
tokenizer = BertTokenizer(vocab_file='vocab.txt') | |
output_path = 'model' | |
model.save_pretrained(output_path) | |
tokenizer.save_pretrained(output_path) | |
print(f'Model saved as {output_path}') | |