Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
!pip install transformers
|
2 |
+
from transformers import AutoTokenizer, AutoModel
|
3 |
+
from torch.nn import functional as F
|
4 |
+
tokenizer = AutoTokenizer.from_pretrained('deepset/sentence_bert')
|
5 |
+
model = AutoModel.from_pretrained('deepset/sentence_bert')
|
6 |
+
|
7 |
+
sentence = 'Who are you voting for in 2020?'
|
8 |
+
labels = ['business', 'art & culture', 'politics']
|
9 |
+
|
10 |
+
# run inputs through model and mean-pool over the sequence
|
11 |
+
# dimension to get sequence-level representations
|
12 |
+
inputs = tokenizer.batch_encode_plus([sentence] + labels,
|
13 |
+
return_tensors='pt',
|
14 |
+
pad_to_max_length=True)
|
15 |
+
input_ids = inputs['input_ids']
|
16 |
+
attention_mask = inputs['attention_mask']
|
17 |
+
output = model(input_ids, attention_mask=attention_mask)[0]
|
18 |
+
sentence_rep = output[:1].mean(dim=1)
|
19 |
+
label_reps = output[1:].mean(dim=1)
|
20 |
+
|
21 |
+
# now find the labels with the highest cosine similarities to
|
22 |
+
# the sentence
|
23 |
+
similarities = F.cosine_similarity(sentence_rep, label_reps)
|
24 |
+
closest = similarities.argsort(descending=True)
|
25 |
+
for ind in closest:
|
26 |
+
print(f'label: {labels[ind]} \t similarity: {similarities[ind]}')
|