mathemakitten commited on
Commit
6636a1f
·
1 Parent(s): 1d0896c

eval suite

Browse files
Files changed (2) hide show
  1. README.md +4 -4
  2. glue-evaluation-suite.py +137 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Glue Evaluation Suite
3
- emoji: 💩
4
- colorFrom: blue
5
- colorTo: yellow
6
  sdk: static
7
  pinned: false
8
  ---
 
1
  ---
2
+ title: Glue Suite V2
3
+ emoji: 🐢
4
+ colorFrom: yellow
5
+ colorTo: purple
6
  sdk: static
7
  pinned: false
8
  ---
glue-evaluation-suite.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import evaluate
2
+ from evaluate.evaluation_suite import SubTask
3
+
4
+
5
+ class Suite(evaluate.EvaluationSuite):
6
+
7
+ def __init__(self, name):
8
+ super().__init__(name)
9
+ self.preprocessor = lambda x: {"text": x["text"].lower()}
10
+ self.suite = [
11
+ SubTask(
12
+ task_type="text-classification",
13
+ data="glue",
14
+ subset="cola",
15
+ split="test[:10]",
16
+ args_for_task={
17
+ "metric": "accuracy",
18
+ "input_column": "sentence",
19
+ "label_column": "label",
20
+ "label_mapping": {
21
+ "LABEL_0": 0.0,
22
+ "LABEL_1": 1.0
23
+ }
24
+ }
25
+ ),
26
+ SubTask(
27
+ task_type="text-classification",
28
+ data="glue",
29
+ subset="sst2",
30
+ split="validation[:10]",
31
+ args_for_task={
32
+ "metric": "accuracy",
33
+ "input_column": "sentence",
34
+ "label_column": "label",
35
+ "label_mapping": {
36
+ "LABEL_0": 0.0,
37
+ "LABEL_1": 1.0
38
+ }
39
+ }
40
+ ),
41
+ SubTask(
42
+ task_type="text-classification",
43
+ data="glue",
44
+ subset="qqp",
45
+ split="validation[:10]",
46
+ args_for_task={
47
+ "metric": "accuracy",
48
+ "input_column": "question1",
49
+ "second_input_column": "question2",
50
+ "label_column": "label",
51
+ "label_mapping": {
52
+ "LABEL_0": 0,
53
+ "LABEL_1": 1
54
+ }
55
+ }
56
+ ),
57
+ SubTask(
58
+ task_type="text-classification",
59
+ data="glue",
60
+ subset="mrpc",
61
+ split="validation[:10]",
62
+ args_for_task={
63
+ "metric": "accuracy",
64
+ "input_column": "sentence1",
65
+ "second_input_column": "sentence2",
66
+ "label_column": "label",
67
+ "label_mapping": {
68
+ "LABEL_0": 0,
69
+ "LABEL_1": 1
70
+ }
71
+ }
72
+ ),
73
+ SubTask(
74
+ task_type="text-classification",
75
+ data="glue",
76
+ subset="mnli",
77
+ split="validation_mismatched[:10]",
78
+ args_for_task={
79
+ "metric": "accuracy",
80
+ "input_column": "premise",
81
+ "second_input_column": "hypothesis",
82
+ "label_mapping": {
83
+ "LABEL_0": 0,
84
+ "LABEL_1": 1,
85
+ "LABEL_2": 2
86
+ }
87
+ }
88
+ ),
89
+ SubTask(
90
+ task_type="text-classification",
91
+ data="glue",
92
+ subset="qnli",
93
+ split="validation[:10]",
94
+ args_for_task={
95
+ "metric": "accuracy",
96
+ "input_column": "question",
97
+ "second_input_column": "sentence",
98
+ "label_column": "label",
99
+ "label_mapping": {
100
+ "LABEL_0": 0,
101
+ "LABEL_1": 1
102
+ }
103
+ }
104
+ ),
105
+ SubTask(
106
+ task_type="text-classification",
107
+ data="glue",
108
+ subset="rte",
109
+ split="validation[:10]",
110
+ args_for_task={
111
+ "metric": "accuracy",
112
+ "input_column": "sentence1",
113
+ "second_input_column": "sentence2",
114
+ "label_column": "label",
115
+ "label_mapping": {
116
+ "LABEL_0": 0,
117
+ "LABEL_1": 1
118
+ }
119
+ }
120
+ ),
121
+ SubTask(
122
+ task_type="text-classification",
123
+ data="glue",
124
+ subset="wnli",
125
+ split="validation[:10]",
126
+ args_for_task={
127
+ "metric": "accuracy",
128
+ "input_column": "sentence1",
129
+ "second_input_column": "sentence2",
130
+ "label_column": "label",
131
+ "label_mapping": {
132
+ "LABEL_0": 0,
133
+ "LABEL_1": 1
134
+ }
135
+ }
136
+ )
137
+ ]