sasha HF staff commited on
Commit
d276596
1 Parent(s): 646dc33

Update app.py

Browse files

a bunch of changes that will probably crash

Files changed (1) hide show
  1. app.py +79 -28
app.py CHANGED
@@ -10,48 +10,99 @@ import matplotlib.pyplot as plt
10
 
11
  st.title("Metric Compare")
12
 
13
- st.markdown("## Choose the dataset you want to use for the comparison:")
14
 
15
  api = HfApi()
16
  datasets = [d.id for d in api.list_datasets(filter="task_categories:text-classification", sort = "downloads", direction=-1, limit = 20)]
17
 
18
  dset = st.selectbox('Choose a dataset from the Hub', options=datasets)
19
 
20
- st.markdown("## Now select up to 5 models to compare their performance:")
21
 
22
- filt = ModelFilter(trained_dataset=dset)
23
- models = [m.modelId for m in api.list_models(filter=filt, sort = "downloads", direction=-1, limit = 20)]
24
-
25
- model = st.multiselect(
26
- 'Choose the models that have been trained/finetuned on this dataset',
27
- options=models)
28
-
29
- """
30
- tokenizer1 = AutoTokenizer.from_pretrained("lvwerra/distilbert-imdb")
31
-
32
- model1 = AutoModelForSequenceClassification.from_pretrained("lvwerra/distilbert-imdb")
33
-
34
- tokenizer2 = AutoTokenizer.from_pretrained("sahn/distilbert-base-uncased-finetuned-imdb")
35
 
36
- model2 = AutoModelForSequenceClassification.from_pretrained("sahn/distilbert-base-uncased-finetuned-imdb")
37
 
38
- tokenizer3 = AutoTokenizer.from_pretrained("aychang/roberta-base-imdb")
39
-
40
- model3 = AutoModelForSequenceClassification.from_pretrained("aychang/roberta-base-imdb")
41
-
42
- tokenizer4 = AutoTokenizer.from_pretrained("Sreevishnu/funnel-transformer-small-imdb")
43
- model4 = AutoModelForSequenceClassification.from_pretrained("Sreevishnu/funnel-transformer-small-imdb")
44
 
45
- tokenizer5 = AutoTokenizer.from_pretrained("RANG012/SENATOR")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- model5 = AutoModelForSequenceClassification.from_pretrained("RANG012/SENATOR")
48
 
49
- accuracy = evaluate.load("accuracy")
50
- f1 = evaluate.load('f1')
51
 
52
- data = datasets.load_dataset("imdb", split="test").shuffle().select(range(1000))
53
 
54
- eval = evaluator("text-classification")
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  pipe1 = pipeline("text-classification", model=model1, tokenizer= tokenizer1, device=0)
57
  res_accuracy1 = eval.compute(model_or_pipeline=pipe1, data=data, metric=accuracy,
 
10
 
11
  st.title("Metric Compare")
12
 
13
+ st.markdown("### Choose the dataset you want to use for the comparison:")
14
 
15
  api = HfApi()
16
  datasets = [d.id for d in api.list_datasets(filter="task_categories:text-classification", sort = "downloads", direction=-1, limit = 20)]
17
 
18
  dset = st.selectbox('Choose a dataset from the Hub', options=datasets)
19
 
20
+ dset_split = st.selectbox('Choose a dataset split for evaluation', options=dset.keys())
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ st.markdown("### Now select up to 5 models to compare their performance:")
24
 
25
+ filt = ModelFilter(trained_dataset=dset)
26
+ all_models = [m.modelId for m in api.list_models(filter=filt, sort = "downloads", direction=-1, limit = 20) if 't5' not in model.tags]
 
 
 
 
27
 
28
+ models = st.multiselect(
29
+ 'Choose the models that have been trained/finetuned on this dataset',
30
+ options=all_models)
31
+
32
+ button = st.button("Print Models",disabled=False)
33
+
34
+ if button :
35
+ if len(location) < 6:
36
+ st.write(models)
37
+ else:
38
+ st.warning("Please select at most 5 models")
39
+
40
+ st.markdown("### What two metrics do you want to compare?")
41
+
42
+ metrics = st.multiselect(
43
+ 'Choose the metrics for the comparison',
44
+ options=['f1', 'accuracy', 'precision', 'recall'])
45
+
46
+ button2 = st.button("Print Metrics",disabled=False)
47
+
48
+ if button2 :
49
+ if len(metrics ) < 3:
50
+ st.write(metrics)
51
+ else:
52
+ st.warning("Please select at most 2 metrics")
53
+
54
+ st.markdown("### Now wait for the dataset and models to load (this can take some time if they are big!")
55
+
56
+ ### Loading data
57
+ try:
58
+ data = datasets.load_dataset(dset, split=dset_split)
59
+ st.text("Loaded the validation split of dataset "+ str(dset))
60
+ except:
61
+ data = datasets.load_dataset(dset, split="test")
62
+ st.text("Loaded the test split of dataset "+ str(dset))
63
+
64
+ st.text("Sorry, I can't load this dataset... try another one!")
65
+
66
+ ### Loading models
67
+
68
+ for i in range (len(models)):
69
+ try:
70
+ globals()[f"tokenizer_{i}"] = AutoTokenizer.from_pretrained(models[i])
71
+ globals()[f"model_{i}"] = AutoModelForSequenceClassification.from_pretrained(models[i])
72
+ st.text("Loaded model "+ str(models[i]))
73
+ except:
74
+ st.text("Sorry, I can't load model "+ str(models[i]))
75
+
76
+ ### Defining metrics
77
+ for i in range (len(metrics)):
78
+ try:
79
+ globals()[f"metrics[i]"] = evaluate.load(metrics[i])
80
+ except:
81
+ st.text("Sorry, I can't load metric "+ str(metrics[i]) +"... Try another one!")
82
+
83
+
84
+ ### Defining Evaluator
85
+ eval = evaluator("text-classification")
86
 
87
+ ### Defining pipelines
88
 
89
+ st.markdown("### Help us pick the right labels for your models")
 
90
 
91
+ st.text("The labels for your dataset are: "+ str(data.features['label'].names))
92
 
93
+ """
94
+ for i in range (len(model_list)):
95
+ st.text("The labels for your dataset are: "+ str(data.features['label'].names))
96
+ print(model_list[i])
97
+ print(AutoConfig.from_pretrained(models[0]).id2label)
98
+
99
+ for i in range (len(models)):
100
+ try:
101
+ globals()[f"pipe1_{i}"] = AutoTokenizer.from_pretrained(models[i])
102
+ globals()[f"model_{i}"] = AutoModelForSequenceClassification.from_pretrained(models[i])
103
+ st.text("Loaded model "+ str(models[i]))
104
+ except:
105
+ st.text("Sorry, I can't load model "+ str(models[i]))
106
 
107
  pipe1 = pipeline("text-classification", model=model1, tokenizer= tokenizer1, device=0)
108
  res_accuracy1 = eval.compute(model_or_pipeline=pipe1, data=data, metric=accuracy,