sasha HF staff commited on
Commit
b8be36c
1 Parent(s): 5630612

Create app.py

Browse files

adding initial functionalities

Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from evaluate import evaluator
3
+ import evaluate
4
+ import datasets
5
+ import sentencepiece
6
+ from huggingface_hub import HfApi, ModelFilter
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
9
+ from transformers import pipeline
10
+ import matplotlib.pyplot as plt
11
+
12
+ st.title("Metric Compare")
13
+
14
+ st.markdown("## Choose the dataset you want to use for the comparison:")
15
+
16
+ api = HfApi()
17
+ datasets = api.list_datasets(filter="task_categories:text-classification", sort = "downloads", direction=-1, limit = 20)
18
+
19
+ dset = st.selectbox(datasets)
20
+
21
+ st.markdown("## Now select up to 5 models to compare their performance:")
22
+
23
+ models = api.list_models(filter="trained_dataset:imdb", sort = "downloads", direction=-1, limit = 20)
24
+
25
+ options = st.multiselect(
26
+ 'Choose your models',
27
+ models)
28
+
29
+ """
30
+ tokenizer1 = AutoTokenizer.from_pretrained("lvwerra/distilbert-imdb")
31
+
32
+ model1 = AutoModelForSequenceClassification.from_pretrained("lvwerra/distilbert-imdb")
33
+
34
+ tokenizer2 = AutoTokenizer.from_pretrained("sahn/distilbert-base-uncased-finetuned-imdb")
35
+
36
+ model2 = AutoModelForSequenceClassification.from_pretrained("sahn/distilbert-base-uncased-finetuned-imdb")
37
+
38
+ tokenizer3 = AutoTokenizer.from_pretrained("aychang/roberta-base-imdb")
39
+
40
+ model3 = AutoModelForSequenceClassification.from_pretrained("aychang/roberta-base-imdb")
41
+
42
+ tokenizer4 = AutoTokenizer.from_pretrained("Sreevishnu/funnel-transformer-small-imdb")
43
+ model4 = AutoModelForSequenceClassification.from_pretrained("Sreevishnu/funnel-transformer-small-imdb")
44
+
45
+ tokenizer5 = AutoTokenizer.from_pretrained("RANG012/SENATOR")
46
+
47
+ model5 = AutoModelForSequenceClassification.from_pretrained("RANG012/SENATOR")
48
+
49
+ accuracy = evaluate.load("accuracy")
50
+ f1 = evaluate.load('f1')
51
+
52
+ data = datasets.load_dataset("imdb", split="test").shuffle().select(range(1000))
53
+
54
+ eval = evaluator("text-classification")
55
+
56
+ pipe1 = pipeline("text-classification", model=model1, tokenizer= tokenizer1, device=0)
57
+ res_accuracy1 = eval.compute(model_or_pipeline=pipe1, data=data, metric=accuracy,
58
+ label_mapping={"NEGATIVE": 0, "POSITIVE": 1},)
59
+ res_f11 = eval.compute(model_or_pipeline=pipe1, data=data, metric=f1,
60
+ label_mapping={"NEGATIVE": 0, "POSITIVE": 1},)
61
+ print({**res_accuracy1, **res_f11})
62
+
63
+ pipe2 = pipeline("text-classification", model=model2, tokenizer= tokenizer2, device=0)
64
+ res_accuracy2 = eval.compute(model_or_pipeline=pipe2, data=data, metric=accuracy,
65
+ label_mapping={"LABEL_0": 0, "LABEL_1": 1},)
66
+ res_f12 = eval.compute(model_or_pipeline=pipe2, data=data, metric=f1,
67
+ label_mapping={"LABEL_0": 0, "LABEL_1": 1},)
68
+ print({**res_accuracy2, **res_f12})
69
+
70
+ pipe3 = pipeline("text-classification", model=model3, tokenizer= tokenizer3, device=0)
71
+ res_accuracy3 = eval.compute(model_or_pipeline=pipe3, data=data, metric=accuracy,
72
+ label_mapping={"neg": 0, "pos": 1},)
73
+ res_f13 = eval.compute(model_or_pipeline=pipe3, data=data, metric=f1,
74
+ label_mapping={"neg": 0, "pos": 1},)
75
+ print({**res_accuracy3, **res_f13})
76
+
77
+ pipe4 = pipeline("text-classification", model=model4, tokenizer= tokenizer4, device=0)
78
+ res_accuracy4 = eval.compute(model_or_pipeline=pipe4, data=data, metric=accuracy,
79
+ label_mapping={"LABEL_0": 0, "LABEL_1": 1},)
80
+ res_f14 = eval.compute(model_or_pipeline=pipe4, data=data, metric=f1,
81
+ label_mapping={"LABEL_0": 0, "LABEL_1": 1},)
82
+ print({**res_accuracy4, **res_f14})
83
+
84
+ pipe5 = pipeline("text-classification", model=model5, tokenizer= tokenizer5, device=0)
85
+ res_accuracy5 = eval.compute(model_or_pipeline=pipe5, data=data, metric=accuracy,
86
+ label_mapping={"LABEL_0": 0, "LABEL_1": 1},)
87
+ res_f15 = eval.compute(model_or_pipeline=pipe5, data=data, metric=f1,
88
+ label_mapping={"LABEL_0": 0, "LABEL_1": 1},)
89
+ print({**res_accuracy5, **res_f15})
90
+
91
+ plt.plot(res_accuracy1['accuracy'], res_f11['f1'], marker='o', markersize=6, color="red")
92
+ plt.annotate('distilbert', xy=(res_accuracy1['accuracy']+0.001, res_f11['f1']))
93
+ plt.plot(res_accuracy2['accuracy'], res_f12['f1'], marker='o', markersize=6, color="blue")
94
+ plt.annotate('distilbert-base-uncased-finetuned', xy=(res_accuracy2['accuracy']+0.001, res_f12['f1']))
95
+ plt.plot(res_accuracy3['accuracy'], res_f13['f1'], marker='o', markersize=6, color="green")
96
+ plt.annotate('roberta-base', xy=(res_accuracy3['accuracy']-0.009, res_f13['f1']))
97
+ plt.plot(res_accuracy4['accuracy'], res_f14['f1'], marker='o', markersize=6, color="purple")
98
+ plt.annotate('funnel-transformer-small', xy=(res_accuracy4['accuracy']-0.015, res_f14['f1']))
99
+ plt.plot(res_accuracy5['accuracy'], res_f15['f1'], marker='o', markersize=6, color="black")
100
+ plt.annotate('SENATOR', xy=(res_accuracy5['accuracy']+0.001, res_f15['f1']))
101
+
102
+ plt.xlabel('Accuracy')
103
+ plt.ylabel('F1 Score')
104
+ #plt.xlim([0.9, 1.0])
105
+ #plt.ylim([0.9, 1.0])
106
+ plt.title('Comparing the Models')
107
+ """