Commit
·
f968140
1
Parent(s):
c7cadcc
Update README.md
Browse files
README.md
CHANGED
@@ -35,9 +35,14 @@ pip install transformers
|
|
35 |
```
|
36 |
|
37 |
> Then, you can load the model and tokenizer using the following code:
|
38 |
-
```
|
39 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
40 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
41 |
MODEL = "Karim-Gamal/BERT-base-finetuned-emojis-IID-Fed"
|
42 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
43 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
@@ -46,6 +51,15 @@ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
|
46 |
> Once you have the tokenizer and model, you can preprocess your text and pass it to the model for prediction:
|
47 |
|
48 |
```python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
text = "Hello world"
|
50 |
text = preprocess(text)
|
51 |
encoded_input = tokenizer(text, return_tensors='pt')
|
@@ -56,6 +70,14 @@ scores = output[0][0].detach().numpy()
|
|
56 |
> The scores variable contains the probabilities for each of the possible emoji labels. To get the top k predictions, you can use the following code:
|
57 |
|
58 |
```python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
k = 3 # number of top predictions to show
|
60 |
ranking = np.argsort(scores)
|
61 |
ranking = ranking[::-1]
|
|
|
35 |
```
|
36 |
|
37 |
> Then, you can load the model and tokenizer using the following code:
|
38 |
+
```python
|
39 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
40 |
import numpy as np
|
41 |
+
import urllib.request
|
42 |
+
import csv
|
43 |
+
```
|
44 |
+
|
45 |
+
```python
|
46 |
MODEL = "Karim-Gamal/BERT-base-finetuned-emojis-IID-Fed"
|
47 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
48 |
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
|
|
|
51 |
> Once you have the tokenizer and model, you can preprocess your text and pass it to the model for prediction:
|
52 |
|
53 |
```python
|
54 |
+
# Preprocess text (username and link placeholders)
|
55 |
+
def preprocess(text):
|
56 |
+
new_text = []
|
57 |
+
for t in text.split(" "):
|
58 |
+
t = '@user' if t.startswith('@') and len(t) > 1 else t
|
59 |
+
t = 'http' if t.startswith('http') else t
|
60 |
+
new_text.append(t)
|
61 |
+
return " ".join(new_text)
|
62 |
+
|
63 |
text = "Hello world"
|
64 |
text = preprocess(text)
|
65 |
encoded_input = tokenizer(text, return_tensors='pt')
|
|
|
70 |
> The scores variable contains the probabilities for each of the possible emoji labels. To get the top k predictions, you can use the following code:
|
71 |
|
72 |
```python
|
73 |
+
# download label mapping
|
74 |
+
labels=[]
|
75 |
+
mapping_link = "https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/emoji/mapping.txt"
|
76 |
+
with urllib.request.urlopen(mapping_link) as f:
|
77 |
+
html = f.read().decode('utf-8').split("\n")
|
78 |
+
csvreader = csv.reader(html, delimiter='\t')
|
79 |
+
labels = [row[1] for row in csvreader if len(row) > 1]
|
80 |
+
|
81 |
k = 3 # number of top predictions to show
|
82 |
ranking = np.argsort(scores)
|
83 |
ranking = ranking[::-1]
|