File size: 642 Bytes
a5fb347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from refactor_analysis import RefactorAnalysis
from transformers import AutoTokenizer, AutoModel
import torch
model_name = "huggingface/CodeBERTa-small-v1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
tokenized_inputs =[tokenizer(file_content, return_tensors="pt") for file_content in RefactorAnalysis()._parent_child_commit_map()]


with torch.no_grad():
    outputs = [model(**input) for input in tokenized_inputs]
    embeddings = [output.last_hidden_state.mean(dim=1).squeeze() for output in outputs]
# print(RefactorAnalysis()._parent_child_commit_map())

print(embeddings[0].shape)