bbunzeck commited on
Commit
314d211
1 Parent(s): 3dff9f7

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 0,
6
+ "eos_token_id": 0,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 128,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 128,
11
+ "max_position_embeddings": 128,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 8,
14
+ "num_hidden_layers": 8,
15
+ "num_key_value_heads": 8,
16
+ "pad_token_id": 1,
17
+ "pretraining_tp": 1,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.32.1",
23
+ "use_cache": true,
24
+ "vocab_size": 16000
25
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.32.1"
7
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:600ec9802331fc7a9e68b938467c7ac4a02c57eebe4f15d2664ed7f9d3088708
3
+ size 20087695
results.json ADDED
@@ -0,0 +1,2247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "blimp": {
4
+ "acc,none": 0.6665074626865671,
5
+ "acc_stderr,none": 0.2001790545943022,
6
+ "alias": "blimp"
7
+ },
8
+ "blimp_adjunct_island": {
9
+ "acc,none": 0.668,
10
+ "acc_stderr,none": 0.014899597242811476,
11
+ "alias": " - blimp_adjunct_island"
12
+ },
13
+ "blimp_anaphor_gender_agreement": {
14
+ "acc,none": 0.67,
15
+ "acc_stderr,none": 0.014876872027456741,
16
+ "alias": " - blimp_anaphor_gender_agreement"
17
+ },
18
+ "blimp_anaphor_number_agreement": {
19
+ "acc,none": 0.863,
20
+ "acc_stderr,none": 0.01087884871433333,
21
+ "alias": " - blimp_anaphor_number_agreement"
22
+ },
23
+ "blimp_animate_subject_passive": {
24
+ "acc,none": 0.74,
25
+ "acc_stderr,none": 0.013877773329774166,
26
+ "alias": " - blimp_animate_subject_passive"
27
+ },
28
+ "blimp_animate_subject_trans": {
29
+ "acc,none": 0.692,
30
+ "acc_stderr,none": 0.01460648312734276,
31
+ "alias": " - blimp_animate_subject_trans"
32
+ },
33
+ "blimp_causative": {
34
+ "acc,none": 0.613,
35
+ "acc_stderr,none": 0.015410011955493935,
36
+ "alias": " - blimp_causative"
37
+ },
38
+ "blimp_complex_NP_island": {
39
+ "acc,none": 0.539,
40
+ "acc_stderr,none": 0.01577110420128319,
41
+ "alias": " - blimp_complex_NP_island"
42
+ },
43
+ "blimp_coordinate_structure_constraint_complex_left_branch": {
44
+ "acc,none": 0.378,
45
+ "acc_stderr,none": 0.015341165254026649,
46
+ "alias": " - blimp_coordinate_structure_constraint_complex_left_branch"
47
+ },
48
+ "blimp_coordinate_structure_constraint_object_extraction": {
49
+ "acc,none": 0.559,
50
+ "acc_stderr,none": 0.015708779894242676,
51
+ "alias": " - blimp_coordinate_structure_constraint_object_extraction"
52
+ },
53
+ "blimp_determiner_noun_agreement_1": {
54
+ "acc,none": 0.912,
55
+ "acc_stderr,none": 0.008963053962592086,
56
+ "alias": " - blimp_determiner_noun_agreement_1"
57
+ },
58
+ "blimp_determiner_noun_agreement_2": {
59
+ "acc,none": 0.932,
60
+ "acc_stderr,none": 0.007964887911291605,
61
+ "alias": " - blimp_determiner_noun_agreement_2"
62
+ },
63
+ "blimp_determiner_noun_agreement_irregular_1": {
64
+ "acc,none": 0.738,
65
+ "acc_stderr,none": 0.013912208651021345,
66
+ "alias": " - blimp_determiner_noun_agreement_irregular_1"
67
+ },
68
+ "blimp_determiner_noun_agreement_irregular_2": {
69
+ "acc,none": 0.849,
70
+ "acc_stderr,none": 0.011328165223341681,
71
+ "alias": " - blimp_determiner_noun_agreement_irregular_2"
72
+ },
73
+ "blimp_determiner_noun_agreement_with_adj_2": {
74
+ "acc,none": 0.885,
75
+ "acc_stderr,none": 0.010093407594904624,
76
+ "alias": " - blimp_determiner_noun_agreement_with_adj_2"
77
+ },
78
+ "blimp_determiner_noun_agreement_with_adj_irregular_1": {
79
+ "acc,none": 0.787,
80
+ "acc_stderr,none": 0.01295371756673723,
81
+ "alias": " - blimp_determiner_noun_agreement_with_adj_irregular_1"
82
+ },
83
+ "blimp_determiner_noun_agreement_with_adj_irregular_2": {
84
+ "acc,none": 0.826,
85
+ "acc_stderr,none": 0.011994493230973438,
86
+ "alias": " - blimp_determiner_noun_agreement_with_adj_irregular_2"
87
+ },
88
+ "blimp_determiner_noun_agreement_with_adjective_1": {
89
+ "acc,none": 0.888,
90
+ "acc_stderr,none": 0.00997775303139724,
91
+ "alias": " - blimp_determiner_noun_agreement_with_adjective_1"
92
+ },
93
+ "blimp_distractor_agreement_relational_noun": {
94
+ "acc,none": 0.446,
95
+ "acc_stderr,none": 0.015726771166750357,
96
+ "alias": " - blimp_distractor_agreement_relational_noun"
97
+ },
98
+ "blimp_distractor_agreement_relative_clause": {
99
+ "acc,none": 0.412,
100
+ "acc_stderr,none": 0.01557236329201509,
101
+ "alias": " - blimp_distractor_agreement_relative_clause"
102
+ },
103
+ "blimp_drop_argument": {
104
+ "acc,none": 0.69,
105
+ "acc_stderr,none": 0.014632638658632902,
106
+ "alias": " - blimp_drop_argument"
107
+ },
108
+ "blimp_ellipsis_n_bar_1": {
109
+ "acc,none": 0.599,
110
+ "acc_stderr,none": 0.015506109745498316,
111
+ "alias": " - blimp_ellipsis_n_bar_1"
112
+ },
113
+ "blimp_ellipsis_n_bar_2": {
114
+ "acc,none": 0.483,
115
+ "acc_stderr,none": 0.015810153729833434,
116
+ "alias": " - blimp_ellipsis_n_bar_2"
117
+ },
118
+ "blimp_existential_there_object_raising": {
119
+ "acc,none": 0.696,
120
+ "acc_stderr,none": 0.014553205687950427,
121
+ "alias": " - blimp_existential_there_object_raising"
122
+ },
123
+ "blimp_existential_there_quantifiers_1": {
124
+ "acc,none": 0.936,
125
+ "acc_stderr,none": 0.007743640226919291,
126
+ "alias": " - blimp_existential_there_quantifiers_1"
127
+ },
128
+ "blimp_existential_there_quantifiers_2": {
129
+ "acc,none": 0.488,
130
+ "acc_stderr,none": 0.015814743314581818,
131
+ "alias": " - blimp_existential_there_quantifiers_2"
132
+ },
133
+ "blimp_existential_there_subject_raising": {
134
+ "acc,none": 0.766,
135
+ "acc_stderr,none": 0.013394902889660006,
136
+ "alias": " - blimp_existential_there_subject_raising"
137
+ },
138
+ "blimp_expletive_it_object_raising": {
139
+ "acc,none": 0.661,
140
+ "acc_stderr,none": 0.014976758771620344,
141
+ "alias": " - blimp_expletive_it_object_raising"
142
+ },
143
+ "blimp_inchoative": {
144
+ "acc,none": 0.51,
145
+ "acc_stderr,none": 0.015816135752773203,
146
+ "alias": " - blimp_inchoative"
147
+ },
148
+ "blimp_intransitive": {
149
+ "acc,none": 0.648,
150
+ "acc_stderr,none": 0.015110404505648675,
151
+ "alias": " - blimp_intransitive"
152
+ },
153
+ "blimp_irregular_past_participle_adjectives": {
154
+ "acc,none": 0.811,
155
+ "acc_stderr,none": 0.012386784588117709,
156
+ "alias": " - blimp_irregular_past_participle_adjectives"
157
+ },
158
+ "blimp_irregular_past_participle_verbs": {
159
+ "acc,none": 0.819,
160
+ "acc_stderr,none": 0.01218143617917792,
161
+ "alias": " - blimp_irregular_past_participle_verbs"
162
+ },
163
+ "blimp_irregular_plural_subject_verb_agreement_1": {
164
+ "acc,none": 0.698,
165
+ "acc_stderr,none": 0.014526080235459543,
166
+ "alias": " - blimp_irregular_plural_subject_verb_agreement_1"
167
+ },
168
+ "blimp_irregular_plural_subject_verb_agreement_2": {
169
+ "acc,none": 0.779,
170
+ "acc_stderr,none": 0.01312750285969623,
171
+ "alias": " - blimp_irregular_plural_subject_verb_agreement_2"
172
+ },
173
+ "blimp_left_branch_island_echo_question": {
174
+ "acc,none": 0.542,
175
+ "acc_stderr,none": 0.015763390640483703,
176
+ "alias": " - blimp_left_branch_island_echo_question"
177
+ },
178
+ "blimp_left_branch_island_simple_question": {
179
+ "acc,none": 0.496,
180
+ "acc_stderr,none": 0.01581879370351089,
181
+ "alias": " - blimp_left_branch_island_simple_question"
182
+ },
183
+ "blimp_matrix_question_npi_licensor_present": {
184
+ "acc,none": 0.053,
185
+ "acc_stderr,none": 0.007088105617246444,
186
+ "alias": " - blimp_matrix_question_npi_licensor_present"
187
+ },
188
+ "blimp_npi_present_1": {
189
+ "acc,none": 0.327,
190
+ "acc_stderr,none": 0.014842213153411247,
191
+ "alias": " - blimp_npi_present_1"
192
+ },
193
+ "blimp_npi_present_2": {
194
+ "acc,none": 0.477,
195
+ "acc_stderr,none": 0.015802554246726098,
196
+ "alias": " - blimp_npi_present_2"
197
+ },
198
+ "blimp_only_npi_licensor_present": {
199
+ "acc,none": 0.805,
200
+ "acc_stderr,none": 0.012535235623319332,
201
+ "alias": " - blimp_only_npi_licensor_present"
202
+ },
203
+ "blimp_only_npi_scope": {
204
+ "acc,none": 0.569,
205
+ "acc_stderr,none": 0.015667944488173505,
206
+ "alias": " - blimp_only_npi_scope"
207
+ },
208
+ "blimp_passive_1": {
209
+ "acc,none": 0.832,
210
+ "acc_stderr,none": 0.011828605831454259,
211
+ "alias": " - blimp_passive_1"
212
+ },
213
+ "blimp_passive_2": {
214
+ "acc,none": 0.799,
215
+ "acc_stderr,none": 0.012679107214617331,
216
+ "alias": " - blimp_passive_2"
217
+ },
218
+ "blimp_principle_A_c_command": {
219
+ "acc,none": 0.573,
220
+ "acc_stderr,none": 0.015649789644462214,
221
+ "alias": " - blimp_principle_A_c_command"
222
+ },
223
+ "blimp_principle_A_case_1": {
224
+ "acc,none": 1.0,
225
+ "acc_stderr,none": 0.0,
226
+ "alias": " - blimp_principle_A_case_1"
227
+ },
228
+ "blimp_principle_A_case_2": {
229
+ "acc,none": 0.819,
230
+ "acc_stderr,none": 0.012181436179177912,
231
+ "alias": " - blimp_principle_A_case_2"
232
+ },
233
+ "blimp_principle_A_domain_1": {
234
+ "acc,none": 0.967,
235
+ "acc_stderr,none": 0.005651808820452374,
236
+ "alias": " - blimp_principle_A_domain_1"
237
+ },
238
+ "blimp_principle_A_domain_2": {
239
+ "acc,none": 0.66,
240
+ "acc_stderr,none": 0.014987482264363937,
241
+ "alias": " - blimp_principle_A_domain_2"
242
+ },
243
+ "blimp_principle_A_domain_3": {
244
+ "acc,none": 0.513,
245
+ "acc_stderr,none": 0.015813952101896626,
246
+ "alias": " - blimp_principle_A_domain_3"
247
+ },
248
+ "blimp_principle_A_reconstruction": {
249
+ "acc,none": 0.365,
250
+ "acc_stderr,none": 0.015231776226264891,
251
+ "alias": " - blimp_principle_A_reconstruction"
252
+ },
253
+ "blimp_regular_plural_subject_verb_agreement_1": {
254
+ "acc,none": 0.818,
255
+ "acc_stderr,none": 0.012207580637662164,
256
+ "alias": " - blimp_regular_plural_subject_verb_agreement_1"
257
+ },
258
+ "blimp_regular_plural_subject_verb_agreement_2": {
259
+ "acc,none": 0.693,
260
+ "acc_stderr,none": 0.01459328489285263,
261
+ "alias": " - blimp_regular_plural_subject_verb_agreement_2"
262
+ },
263
+ "blimp_sentential_negation_npi_licensor_present": {
264
+ "acc,none": 0.991,
265
+ "acc_stderr,none": 0.002987963843142665,
266
+ "alias": " - blimp_sentential_negation_npi_licensor_present"
267
+ },
268
+ "blimp_sentential_negation_npi_scope": {
269
+ "acc,none": 0.426,
270
+ "acc_stderr,none": 0.015645087688113814,
271
+ "alias": " - blimp_sentential_negation_npi_scope"
272
+ },
273
+ "blimp_sentential_subject_island": {
274
+ "acc,none": 0.451,
275
+ "acc_stderr,none": 0.015743152379585533,
276
+ "alias": " - blimp_sentential_subject_island"
277
+ },
278
+ "blimp_superlative_quantifiers_1": {
279
+ "acc,none": 0.834,
280
+ "acc_stderr,none": 0.011772110370812194,
281
+ "alias": " - blimp_superlative_quantifiers_1"
282
+ },
283
+ "blimp_superlative_quantifiers_2": {
284
+ "acc,none": 0.682,
285
+ "acc_stderr,none": 0.014734079309311901,
286
+ "alias": " - blimp_superlative_quantifiers_2"
287
+ },
288
+ "blimp_tough_vs_raising_1": {
289
+ "acc,none": 0.348,
290
+ "acc_stderr,none": 0.01507060460376841,
291
+ "alias": " - blimp_tough_vs_raising_1"
292
+ },
293
+ "blimp_tough_vs_raising_2": {
294
+ "acc,none": 0.806,
295
+ "acc_stderr,none": 0.012510816141264359,
296
+ "alias": " - blimp_tough_vs_raising_2"
297
+ },
298
+ "blimp_transitive": {
299
+ "acc,none": 0.721,
300
+ "acc_stderr,none": 0.014190150117612045,
301
+ "alias": " - blimp_transitive"
302
+ },
303
+ "blimp_wh_island": {
304
+ "acc,none": 0.498,
305
+ "acc_stderr,none": 0.015819173374302706,
306
+ "alias": " - blimp_wh_island"
307
+ },
308
+ "blimp_wh_questions_object_gap": {
309
+ "acc,none": 0.588,
310
+ "acc_stderr,none": 0.015572363292015095,
311
+ "alias": " - blimp_wh_questions_object_gap"
312
+ },
313
+ "blimp_wh_questions_subject_gap": {
314
+ "acc,none": 0.872,
315
+ "acc_stderr,none": 0.010570133761108663,
316
+ "alias": " - blimp_wh_questions_subject_gap"
317
+ },
318
+ "blimp_wh_questions_subject_gap_long_distance": {
319
+ "acc,none": 0.923,
320
+ "acc_stderr,none": 0.008434580140240648,
321
+ "alias": " - blimp_wh_questions_subject_gap_long_distance"
322
+ },
323
+ "blimp_wh_vs_that_no_gap": {
324
+ "acc,none": 0.92,
325
+ "acc_stderr,none": 0.00858333697775365,
326
+ "alias": " - blimp_wh_vs_that_no_gap"
327
+ },
328
+ "blimp_wh_vs_that_no_gap_long_distance": {
329
+ "acc,none": 0.962,
330
+ "acc_stderr,none": 0.0060491811505849384,
331
+ "alias": " - blimp_wh_vs_that_no_gap_long_distance"
332
+ },
333
+ "blimp_wh_vs_that_with_gap": {
334
+ "acc,none": 0.26,
335
+ "acc_stderr,none": 0.013877773329774164,
336
+ "alias": " - blimp_wh_vs_that_with_gap"
337
+ },
338
+ "blimp_wh_vs_that_with_gap_long_distance": {
339
+ "acc,none": 0.085,
340
+ "acc_stderr,none": 0.008823426366942288,
341
+ "alias": " - blimp_wh_vs_that_with_gap_long_distance"
342
+ }
343
+ },
344
+ "groups": {
345
+ "blimp": {
346
+ "acc,none": 0.6665074626865671,
347
+ "acc_stderr,none": 0.2001790545943022,
348
+ "alias": "blimp"
349
+ }
350
+ },
351
+ "configs": {
352
+ "blimp_adjunct_island": {
353
+ "task": "blimp_adjunct_island",
354
+ "group": "blimp",
355
+ "dataset_path": "blimp",
356
+ "dataset_name": "adjunct_island",
357
+ "validation_split": "train",
358
+ "doc_to_text": "",
359
+ "doc_to_target": 0,
360
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
361
+ "description": "",
362
+ "target_delimiter": " ",
363
+ "fewshot_delimiter": "\n\n",
364
+ "num_fewshot": 0,
365
+ "metric_list": [
366
+ {
367
+ "metric": "acc"
368
+ }
369
+ ],
370
+ "output_type": "multiple_choice",
371
+ "repeats": 1,
372
+ "should_decontaminate": true,
373
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
374
+ "metadata": {
375
+ "version": 1.0
376
+ }
377
+ },
378
+ "blimp_anaphor_gender_agreement": {
379
+ "task": "blimp_anaphor_gender_agreement",
380
+ "group": "blimp",
381
+ "dataset_path": "blimp",
382
+ "dataset_name": "anaphor_gender_agreement",
383
+ "validation_split": "train",
384
+ "doc_to_text": "",
385
+ "doc_to_target": 0,
386
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
387
+ "description": "",
388
+ "target_delimiter": " ",
389
+ "fewshot_delimiter": "\n\n",
390
+ "num_fewshot": 0,
391
+ "metric_list": [
392
+ {
393
+ "metric": "acc"
394
+ }
395
+ ],
396
+ "output_type": "multiple_choice",
397
+ "repeats": 1,
398
+ "should_decontaminate": true,
399
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
400
+ "metadata": {
401
+ "version": 1.0
402
+ }
403
+ },
404
+ "blimp_anaphor_number_agreement": {
405
+ "task": "blimp_anaphor_number_agreement",
406
+ "group": "blimp",
407
+ "dataset_path": "blimp",
408
+ "dataset_name": "anaphor_number_agreement",
409
+ "validation_split": "train",
410
+ "doc_to_text": "",
411
+ "doc_to_target": 0,
412
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
413
+ "description": "",
414
+ "target_delimiter": " ",
415
+ "fewshot_delimiter": "\n\n",
416
+ "num_fewshot": 0,
417
+ "metric_list": [
418
+ {
419
+ "metric": "acc"
420
+ }
421
+ ],
422
+ "output_type": "multiple_choice",
423
+ "repeats": 1,
424
+ "should_decontaminate": true,
425
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
426
+ "metadata": {
427
+ "version": 1.0
428
+ }
429
+ },
430
+ "blimp_animate_subject_passive": {
431
+ "task": "blimp_animate_subject_passive",
432
+ "group": "blimp",
433
+ "dataset_path": "blimp",
434
+ "dataset_name": "animate_subject_passive",
435
+ "validation_split": "train",
436
+ "doc_to_text": "",
437
+ "doc_to_target": 0,
438
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
439
+ "description": "",
440
+ "target_delimiter": " ",
441
+ "fewshot_delimiter": "\n\n",
442
+ "num_fewshot": 0,
443
+ "metric_list": [
444
+ {
445
+ "metric": "acc"
446
+ }
447
+ ],
448
+ "output_type": "multiple_choice",
449
+ "repeats": 1,
450
+ "should_decontaminate": true,
451
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
452
+ "metadata": {
453
+ "version": 1.0
454
+ }
455
+ },
456
+ "blimp_animate_subject_trans": {
457
+ "task": "blimp_animate_subject_trans",
458
+ "group": "blimp",
459
+ "dataset_path": "blimp",
460
+ "dataset_name": "animate_subject_trans",
461
+ "validation_split": "train",
462
+ "doc_to_text": "",
463
+ "doc_to_target": 0,
464
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
465
+ "description": "",
466
+ "target_delimiter": " ",
467
+ "fewshot_delimiter": "\n\n",
468
+ "num_fewshot": 0,
469
+ "metric_list": [
470
+ {
471
+ "metric": "acc"
472
+ }
473
+ ],
474
+ "output_type": "multiple_choice",
475
+ "repeats": 1,
476
+ "should_decontaminate": true,
477
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
478
+ "metadata": {
479
+ "version": 1.0
480
+ }
481
+ },
482
+ "blimp_causative": {
483
+ "task": "blimp_causative",
484
+ "group": "blimp",
485
+ "dataset_path": "blimp",
486
+ "dataset_name": "causative",
487
+ "validation_split": "train",
488
+ "doc_to_text": "",
489
+ "doc_to_target": 0,
490
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
491
+ "description": "",
492
+ "target_delimiter": " ",
493
+ "fewshot_delimiter": "\n\n",
494
+ "num_fewshot": 0,
495
+ "metric_list": [
496
+ {
497
+ "metric": "acc"
498
+ }
499
+ ],
500
+ "output_type": "multiple_choice",
501
+ "repeats": 1,
502
+ "should_decontaminate": true,
503
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
504
+ "metadata": {
505
+ "version": 1.0
506
+ }
507
+ },
508
+ "blimp_complex_NP_island": {
509
+ "task": "blimp_complex_NP_island",
510
+ "group": "blimp",
511
+ "dataset_path": "blimp",
512
+ "dataset_name": "complex_NP_island",
513
+ "validation_split": "train",
514
+ "doc_to_text": "",
515
+ "doc_to_target": 0,
516
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
517
+ "description": "",
518
+ "target_delimiter": " ",
519
+ "fewshot_delimiter": "\n\n",
520
+ "num_fewshot": 0,
521
+ "metric_list": [
522
+ {
523
+ "metric": "acc"
524
+ }
525
+ ],
526
+ "output_type": "multiple_choice",
527
+ "repeats": 1,
528
+ "should_decontaminate": true,
529
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
530
+ "metadata": {
531
+ "version": 1.0
532
+ }
533
+ },
534
+ "blimp_coordinate_structure_constraint_complex_left_branch": {
535
+ "task": "blimp_coordinate_structure_constraint_complex_left_branch",
536
+ "group": "blimp",
537
+ "dataset_path": "blimp",
538
+ "dataset_name": "coordinate_structure_constraint_complex_left_branch",
539
+ "validation_split": "train",
540
+ "doc_to_text": "",
541
+ "doc_to_target": 0,
542
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
543
+ "description": "",
544
+ "target_delimiter": " ",
545
+ "fewshot_delimiter": "\n\n",
546
+ "num_fewshot": 0,
547
+ "metric_list": [
548
+ {
549
+ "metric": "acc"
550
+ }
551
+ ],
552
+ "output_type": "multiple_choice",
553
+ "repeats": 1,
554
+ "should_decontaminate": true,
555
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
556
+ "metadata": {
557
+ "version": 1.0
558
+ }
559
+ },
560
+ "blimp_coordinate_structure_constraint_object_extraction": {
561
+ "task": "blimp_coordinate_structure_constraint_object_extraction",
562
+ "group": "blimp",
563
+ "dataset_path": "blimp",
564
+ "dataset_name": "coordinate_structure_constraint_object_extraction",
565
+ "validation_split": "train",
566
+ "doc_to_text": "",
567
+ "doc_to_target": 0,
568
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
569
+ "description": "",
570
+ "target_delimiter": " ",
571
+ "fewshot_delimiter": "\n\n",
572
+ "num_fewshot": 0,
573
+ "metric_list": [
574
+ {
575
+ "metric": "acc"
576
+ }
577
+ ],
578
+ "output_type": "multiple_choice",
579
+ "repeats": 1,
580
+ "should_decontaminate": true,
581
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
582
+ "metadata": {
583
+ "version": 1.0
584
+ }
585
+ },
586
+ "blimp_determiner_noun_agreement_1": {
587
+ "task": "blimp_determiner_noun_agreement_1",
588
+ "group": "blimp",
589
+ "dataset_path": "blimp",
590
+ "dataset_name": "determiner_noun_agreement_1",
591
+ "validation_split": "train",
592
+ "doc_to_text": "",
593
+ "doc_to_target": 0,
594
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
595
+ "description": "",
596
+ "target_delimiter": " ",
597
+ "fewshot_delimiter": "\n\n",
598
+ "num_fewshot": 0,
599
+ "metric_list": [
600
+ {
601
+ "metric": "acc"
602
+ }
603
+ ],
604
+ "output_type": "multiple_choice",
605
+ "repeats": 1,
606
+ "should_decontaminate": true,
607
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
608
+ "metadata": {
609
+ "version": 1.0
610
+ }
611
+ },
612
+ "blimp_determiner_noun_agreement_2": {
613
+ "task": "blimp_determiner_noun_agreement_2",
614
+ "group": "blimp",
615
+ "dataset_path": "blimp",
616
+ "dataset_name": "determiner_noun_agreement_2",
617
+ "validation_split": "train",
618
+ "doc_to_text": "",
619
+ "doc_to_target": 0,
620
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
621
+ "description": "",
622
+ "target_delimiter": " ",
623
+ "fewshot_delimiter": "\n\n",
624
+ "num_fewshot": 0,
625
+ "metric_list": [
626
+ {
627
+ "metric": "acc"
628
+ }
629
+ ],
630
+ "output_type": "multiple_choice",
631
+ "repeats": 1,
632
+ "should_decontaminate": true,
633
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
634
+ "metadata": {
635
+ "version": 1.0
636
+ }
637
+ },
638
+ "blimp_determiner_noun_agreement_irregular_1": {
639
+ "task": "blimp_determiner_noun_agreement_irregular_1",
640
+ "group": "blimp",
641
+ "dataset_path": "blimp",
642
+ "dataset_name": "determiner_noun_agreement_irregular_1",
643
+ "validation_split": "train",
644
+ "doc_to_text": "",
645
+ "doc_to_target": 0,
646
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
647
+ "description": "",
648
+ "target_delimiter": " ",
649
+ "fewshot_delimiter": "\n\n",
650
+ "num_fewshot": 0,
651
+ "metric_list": [
652
+ {
653
+ "metric": "acc"
654
+ }
655
+ ],
656
+ "output_type": "multiple_choice",
657
+ "repeats": 1,
658
+ "should_decontaminate": true,
659
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
660
+ "metadata": {
661
+ "version": 1.0
662
+ }
663
+ },
664
+ "blimp_determiner_noun_agreement_irregular_2": {
665
+ "task": "blimp_determiner_noun_agreement_irregular_2",
666
+ "group": "blimp",
667
+ "dataset_path": "blimp",
668
+ "dataset_name": "determiner_noun_agreement_irregular_2",
669
+ "validation_split": "train",
670
+ "doc_to_text": "",
671
+ "doc_to_target": 0,
672
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
673
+ "description": "",
674
+ "target_delimiter": " ",
675
+ "fewshot_delimiter": "\n\n",
676
+ "num_fewshot": 0,
677
+ "metric_list": [
678
+ {
679
+ "metric": "acc"
680
+ }
681
+ ],
682
+ "output_type": "multiple_choice",
683
+ "repeats": 1,
684
+ "should_decontaminate": true,
685
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
686
+ "metadata": {
687
+ "version": 1.0
688
+ }
689
+ },
690
+ "blimp_determiner_noun_agreement_with_adj_2": {
691
+ "task": "blimp_determiner_noun_agreement_with_adj_2",
692
+ "group": "blimp",
693
+ "dataset_path": "blimp",
694
+ "dataset_name": "determiner_noun_agreement_with_adj_2",
695
+ "validation_split": "train",
696
+ "doc_to_text": "",
697
+ "doc_to_target": 0,
698
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
699
+ "description": "",
700
+ "target_delimiter": " ",
701
+ "fewshot_delimiter": "\n\n",
702
+ "num_fewshot": 0,
703
+ "metric_list": [
704
+ {
705
+ "metric": "acc"
706
+ }
707
+ ],
708
+ "output_type": "multiple_choice",
709
+ "repeats": 1,
710
+ "should_decontaminate": true,
711
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
712
+ "metadata": {
713
+ "version": 1.0
714
+ }
715
+ },
716
+ "blimp_determiner_noun_agreement_with_adj_irregular_1": {
717
+ "task": "blimp_determiner_noun_agreement_with_adj_irregular_1",
718
+ "group": "blimp",
719
+ "dataset_path": "blimp",
720
+ "dataset_name": "determiner_noun_agreement_with_adj_irregular_1",
721
+ "validation_split": "train",
722
+ "doc_to_text": "",
723
+ "doc_to_target": 0,
724
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
725
+ "description": "",
726
+ "target_delimiter": " ",
727
+ "fewshot_delimiter": "\n\n",
728
+ "num_fewshot": 0,
729
+ "metric_list": [
730
+ {
731
+ "metric": "acc"
732
+ }
733
+ ],
734
+ "output_type": "multiple_choice",
735
+ "repeats": 1,
736
+ "should_decontaminate": true,
737
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
738
+ "metadata": {
739
+ "version": 1.0
740
+ }
741
+ },
742
+ "blimp_determiner_noun_agreement_with_adj_irregular_2": {
743
+ "task": "blimp_determiner_noun_agreement_with_adj_irregular_2",
744
+ "group": "blimp",
745
+ "dataset_path": "blimp",
746
+ "dataset_name": "determiner_noun_agreement_with_adj_irregular_2",
747
+ "validation_split": "train",
748
+ "doc_to_text": "",
749
+ "doc_to_target": 0,
750
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
751
+ "description": "",
752
+ "target_delimiter": " ",
753
+ "fewshot_delimiter": "\n\n",
754
+ "num_fewshot": 0,
755
+ "metric_list": [
756
+ {
757
+ "metric": "acc"
758
+ }
759
+ ],
760
+ "output_type": "multiple_choice",
761
+ "repeats": 1,
762
+ "should_decontaminate": true,
763
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
764
+ "metadata": {
765
+ "version": 1.0
766
+ }
767
+ },
768
+ "blimp_determiner_noun_agreement_with_adjective_1": {
769
+ "task": "blimp_determiner_noun_agreement_with_adjective_1",
770
+ "group": "blimp",
771
+ "dataset_path": "blimp",
772
+ "dataset_name": "determiner_noun_agreement_with_adjective_1",
773
+ "validation_split": "train",
774
+ "doc_to_text": "",
775
+ "doc_to_target": 0,
776
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
777
+ "description": "",
778
+ "target_delimiter": " ",
779
+ "fewshot_delimiter": "\n\n",
780
+ "num_fewshot": 0,
781
+ "metric_list": [
782
+ {
783
+ "metric": "acc"
784
+ }
785
+ ],
786
+ "output_type": "multiple_choice",
787
+ "repeats": 1,
788
+ "should_decontaminate": true,
789
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
790
+ "metadata": {
791
+ "version": 1.0
792
+ }
793
+ },
794
+ "blimp_distractor_agreement_relational_noun": {
795
+ "task": "blimp_distractor_agreement_relational_noun",
796
+ "group": "blimp",
797
+ "dataset_path": "blimp",
798
+ "dataset_name": "distractor_agreement_relational_noun",
799
+ "validation_split": "train",
800
+ "doc_to_text": "",
801
+ "doc_to_target": 0,
802
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
803
+ "description": "",
804
+ "target_delimiter": " ",
805
+ "fewshot_delimiter": "\n\n",
806
+ "num_fewshot": 0,
807
+ "metric_list": [
808
+ {
809
+ "metric": "acc"
810
+ }
811
+ ],
812
+ "output_type": "multiple_choice",
813
+ "repeats": 1,
814
+ "should_decontaminate": true,
815
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
816
+ "metadata": {
817
+ "version": 1.0
818
+ }
819
+ },
820
+ "blimp_distractor_agreement_relative_clause": {
821
+ "task": "blimp_distractor_agreement_relative_clause",
822
+ "group": "blimp",
823
+ "dataset_path": "blimp",
824
+ "dataset_name": "distractor_agreement_relative_clause",
825
+ "validation_split": "train",
826
+ "doc_to_text": "",
827
+ "doc_to_target": 0,
828
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
829
+ "description": "",
830
+ "target_delimiter": " ",
831
+ "fewshot_delimiter": "\n\n",
832
+ "num_fewshot": 0,
833
+ "metric_list": [
834
+ {
835
+ "metric": "acc"
836
+ }
837
+ ],
838
+ "output_type": "multiple_choice",
839
+ "repeats": 1,
840
+ "should_decontaminate": true,
841
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
842
+ "metadata": {
843
+ "version": 1.0
844
+ }
845
+ },
846
+ "blimp_drop_argument": {
847
+ "task": "blimp_drop_argument",
848
+ "group": "blimp",
849
+ "dataset_path": "blimp",
850
+ "dataset_name": "drop_argument",
851
+ "validation_split": "train",
852
+ "doc_to_text": "",
853
+ "doc_to_target": 0,
854
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
855
+ "description": "",
856
+ "target_delimiter": " ",
857
+ "fewshot_delimiter": "\n\n",
858
+ "num_fewshot": 0,
859
+ "metric_list": [
860
+ {
861
+ "metric": "acc"
862
+ }
863
+ ],
864
+ "output_type": "multiple_choice",
865
+ "repeats": 1,
866
+ "should_decontaminate": true,
867
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
868
+ "metadata": {
869
+ "version": 1.0
870
+ }
871
+ },
872
+ "blimp_ellipsis_n_bar_1": {
873
+ "task": "blimp_ellipsis_n_bar_1",
874
+ "group": "blimp",
875
+ "dataset_path": "blimp",
876
+ "dataset_name": "ellipsis_n_bar_1",
877
+ "validation_split": "train",
878
+ "doc_to_text": "",
879
+ "doc_to_target": 0,
880
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
881
+ "description": "",
882
+ "target_delimiter": " ",
883
+ "fewshot_delimiter": "\n\n",
884
+ "num_fewshot": 0,
885
+ "metric_list": [
886
+ {
887
+ "metric": "acc"
888
+ }
889
+ ],
890
+ "output_type": "multiple_choice",
891
+ "repeats": 1,
892
+ "should_decontaminate": true,
893
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
894
+ "metadata": {
895
+ "version": 1.0
896
+ }
897
+ },
898
+ "blimp_ellipsis_n_bar_2": {
899
+ "task": "blimp_ellipsis_n_bar_2",
900
+ "group": "blimp",
901
+ "dataset_path": "blimp",
902
+ "dataset_name": "ellipsis_n_bar_2",
903
+ "validation_split": "train",
904
+ "doc_to_text": "",
905
+ "doc_to_target": 0,
906
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
907
+ "description": "",
908
+ "target_delimiter": " ",
909
+ "fewshot_delimiter": "\n\n",
910
+ "num_fewshot": 0,
911
+ "metric_list": [
912
+ {
913
+ "metric": "acc"
914
+ }
915
+ ],
916
+ "output_type": "multiple_choice",
917
+ "repeats": 1,
918
+ "should_decontaminate": true,
919
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
920
+ "metadata": {
921
+ "version": 1.0
922
+ }
923
+ },
924
+ "blimp_existential_there_object_raising": {
925
+ "task": "blimp_existential_there_object_raising",
926
+ "group": "blimp",
927
+ "dataset_path": "blimp",
928
+ "dataset_name": "existential_there_object_raising",
929
+ "validation_split": "train",
930
+ "doc_to_text": "",
931
+ "doc_to_target": 0,
932
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
933
+ "description": "",
934
+ "target_delimiter": " ",
935
+ "fewshot_delimiter": "\n\n",
936
+ "num_fewshot": 0,
937
+ "metric_list": [
938
+ {
939
+ "metric": "acc"
940
+ }
941
+ ],
942
+ "output_type": "multiple_choice",
943
+ "repeats": 1,
944
+ "should_decontaminate": true,
945
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
946
+ "metadata": {
947
+ "version": 1.0
948
+ }
949
+ },
950
+ "blimp_existential_there_quantifiers_1": {
951
+ "task": "blimp_existential_there_quantifiers_1",
952
+ "group": "blimp",
953
+ "dataset_path": "blimp",
954
+ "dataset_name": "existential_there_quantifiers_1",
955
+ "validation_split": "train",
956
+ "doc_to_text": "",
957
+ "doc_to_target": 0,
958
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
959
+ "description": "",
960
+ "target_delimiter": " ",
961
+ "fewshot_delimiter": "\n\n",
962
+ "num_fewshot": 0,
963
+ "metric_list": [
964
+ {
965
+ "metric": "acc"
966
+ }
967
+ ],
968
+ "output_type": "multiple_choice",
969
+ "repeats": 1,
970
+ "should_decontaminate": true,
971
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
972
+ "metadata": {
973
+ "version": 1.0
974
+ }
975
+ },
976
+ "blimp_existential_there_quantifiers_2": {
977
+ "task": "blimp_existential_there_quantifiers_2",
978
+ "group": "blimp",
979
+ "dataset_path": "blimp",
980
+ "dataset_name": "existential_there_quantifiers_2",
981
+ "validation_split": "train",
982
+ "doc_to_text": "",
983
+ "doc_to_target": 0,
984
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
985
+ "description": "",
986
+ "target_delimiter": " ",
987
+ "fewshot_delimiter": "\n\n",
988
+ "num_fewshot": 0,
989
+ "metric_list": [
990
+ {
991
+ "metric": "acc"
992
+ }
993
+ ],
994
+ "output_type": "multiple_choice",
995
+ "repeats": 1,
996
+ "should_decontaminate": true,
997
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
998
+ "metadata": {
999
+ "version": 1.0
1000
+ }
1001
+ },
1002
+ "blimp_existential_there_subject_raising": {
1003
+ "task": "blimp_existential_there_subject_raising",
1004
+ "group": "blimp",
1005
+ "dataset_path": "blimp",
1006
+ "dataset_name": "existential_there_subject_raising",
1007
+ "validation_split": "train",
1008
+ "doc_to_text": "",
1009
+ "doc_to_target": 0,
1010
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1011
+ "description": "",
1012
+ "target_delimiter": " ",
1013
+ "fewshot_delimiter": "\n\n",
1014
+ "num_fewshot": 0,
1015
+ "metric_list": [
1016
+ {
1017
+ "metric": "acc"
1018
+ }
1019
+ ],
1020
+ "output_type": "multiple_choice",
1021
+ "repeats": 1,
1022
+ "should_decontaminate": true,
1023
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1024
+ "metadata": {
1025
+ "version": 1.0
1026
+ }
1027
+ },
1028
+ "blimp_expletive_it_object_raising": {
1029
+ "task": "blimp_expletive_it_object_raising",
1030
+ "group": "blimp",
1031
+ "dataset_path": "blimp",
1032
+ "dataset_name": "expletive_it_object_raising",
1033
+ "validation_split": "train",
1034
+ "doc_to_text": "",
1035
+ "doc_to_target": 0,
1036
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1037
+ "description": "",
1038
+ "target_delimiter": " ",
1039
+ "fewshot_delimiter": "\n\n",
1040
+ "num_fewshot": 0,
1041
+ "metric_list": [
1042
+ {
1043
+ "metric": "acc"
1044
+ }
1045
+ ],
1046
+ "output_type": "multiple_choice",
1047
+ "repeats": 1,
1048
+ "should_decontaminate": true,
1049
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1050
+ "metadata": {
1051
+ "version": 1.0
1052
+ }
1053
+ },
1054
+ "blimp_inchoative": {
1055
+ "task": "blimp_inchoative",
1056
+ "group": "blimp",
1057
+ "dataset_path": "blimp",
1058
+ "dataset_name": "inchoative",
1059
+ "validation_split": "train",
1060
+ "doc_to_text": "",
1061
+ "doc_to_target": 0,
1062
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1063
+ "description": "",
1064
+ "target_delimiter": " ",
1065
+ "fewshot_delimiter": "\n\n",
1066
+ "num_fewshot": 0,
1067
+ "metric_list": [
1068
+ {
1069
+ "metric": "acc"
1070
+ }
1071
+ ],
1072
+ "output_type": "multiple_choice",
1073
+ "repeats": 1,
1074
+ "should_decontaminate": true,
1075
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1076
+ "metadata": {
1077
+ "version": 1.0
1078
+ }
1079
+ },
1080
+ "blimp_intransitive": {
1081
+ "task": "blimp_intransitive",
1082
+ "group": "blimp",
1083
+ "dataset_path": "blimp",
1084
+ "dataset_name": "intransitive",
1085
+ "validation_split": "train",
1086
+ "doc_to_text": "",
1087
+ "doc_to_target": 0,
1088
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1089
+ "description": "",
1090
+ "target_delimiter": " ",
1091
+ "fewshot_delimiter": "\n\n",
1092
+ "num_fewshot": 0,
1093
+ "metric_list": [
1094
+ {
1095
+ "metric": "acc"
1096
+ }
1097
+ ],
1098
+ "output_type": "multiple_choice",
1099
+ "repeats": 1,
1100
+ "should_decontaminate": true,
1101
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1102
+ "metadata": {
1103
+ "version": 1.0
1104
+ }
1105
+ },
1106
+ "blimp_irregular_past_participle_adjectives": {
1107
+ "task": "blimp_irregular_past_participle_adjectives",
1108
+ "group": "blimp",
1109
+ "dataset_path": "blimp",
1110
+ "dataset_name": "irregular_past_participle_adjectives",
1111
+ "validation_split": "train",
1112
+ "doc_to_text": "",
1113
+ "doc_to_target": 0,
1114
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1115
+ "description": "",
1116
+ "target_delimiter": " ",
1117
+ "fewshot_delimiter": "\n\n",
1118
+ "num_fewshot": 0,
1119
+ "metric_list": [
1120
+ {
1121
+ "metric": "acc"
1122
+ }
1123
+ ],
1124
+ "output_type": "multiple_choice",
1125
+ "repeats": 1,
1126
+ "should_decontaminate": true,
1127
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1128
+ "metadata": {
1129
+ "version": 1.0
1130
+ }
1131
+ },
1132
+ "blimp_irregular_past_participle_verbs": {
1133
+ "task": "blimp_irregular_past_participle_verbs",
1134
+ "group": "blimp",
1135
+ "dataset_path": "blimp",
1136
+ "dataset_name": "irregular_past_participle_verbs",
1137
+ "validation_split": "train",
1138
+ "doc_to_text": "",
1139
+ "doc_to_target": 0,
1140
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1141
+ "description": "",
1142
+ "target_delimiter": " ",
1143
+ "fewshot_delimiter": "\n\n",
1144
+ "num_fewshot": 0,
1145
+ "metric_list": [
1146
+ {
1147
+ "metric": "acc"
1148
+ }
1149
+ ],
1150
+ "output_type": "multiple_choice",
1151
+ "repeats": 1,
1152
+ "should_decontaminate": true,
1153
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1154
+ "metadata": {
1155
+ "version": 1.0
1156
+ }
1157
+ },
1158
+ "blimp_irregular_plural_subject_verb_agreement_1": {
1159
+ "task": "blimp_irregular_plural_subject_verb_agreement_1",
1160
+ "group": "blimp",
1161
+ "dataset_path": "blimp",
1162
+ "dataset_name": "irregular_plural_subject_verb_agreement_1",
1163
+ "validation_split": "train",
1164
+ "doc_to_text": "",
1165
+ "doc_to_target": 0,
1166
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1167
+ "description": "",
1168
+ "target_delimiter": " ",
1169
+ "fewshot_delimiter": "\n\n",
1170
+ "num_fewshot": 0,
1171
+ "metric_list": [
1172
+ {
1173
+ "metric": "acc"
1174
+ }
1175
+ ],
1176
+ "output_type": "multiple_choice",
1177
+ "repeats": 1,
1178
+ "should_decontaminate": true,
1179
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1180
+ "metadata": {
1181
+ "version": 1.0
1182
+ }
1183
+ },
1184
+ "blimp_irregular_plural_subject_verb_agreement_2": {
1185
+ "task": "blimp_irregular_plural_subject_verb_agreement_2",
1186
+ "group": "blimp",
1187
+ "dataset_path": "blimp",
1188
+ "dataset_name": "irregular_plural_subject_verb_agreement_2",
1189
+ "validation_split": "train",
1190
+ "doc_to_text": "",
1191
+ "doc_to_target": 0,
1192
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1193
+ "description": "",
1194
+ "target_delimiter": " ",
1195
+ "fewshot_delimiter": "\n\n",
1196
+ "num_fewshot": 0,
1197
+ "metric_list": [
1198
+ {
1199
+ "metric": "acc"
1200
+ }
1201
+ ],
1202
+ "output_type": "multiple_choice",
1203
+ "repeats": 1,
1204
+ "should_decontaminate": true,
1205
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1206
+ "metadata": {
1207
+ "version": 1.0
1208
+ }
1209
+ },
1210
+ "blimp_left_branch_island_echo_question": {
1211
+ "task": "blimp_left_branch_island_echo_question",
1212
+ "group": "blimp",
1213
+ "dataset_path": "blimp",
1214
+ "dataset_name": "left_branch_island_echo_question",
1215
+ "validation_split": "train",
1216
+ "doc_to_text": "",
1217
+ "doc_to_target": 0,
1218
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1219
+ "description": "",
1220
+ "target_delimiter": " ",
1221
+ "fewshot_delimiter": "\n\n",
1222
+ "num_fewshot": 0,
1223
+ "metric_list": [
1224
+ {
1225
+ "metric": "acc"
1226
+ }
1227
+ ],
1228
+ "output_type": "multiple_choice",
1229
+ "repeats": 1,
1230
+ "should_decontaminate": true,
1231
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1232
+ "metadata": {
1233
+ "version": 1.0
1234
+ }
1235
+ },
1236
+ "blimp_left_branch_island_simple_question": {
1237
+ "task": "blimp_left_branch_island_simple_question",
1238
+ "group": "blimp",
1239
+ "dataset_path": "blimp",
1240
+ "dataset_name": "left_branch_island_simple_question",
1241
+ "validation_split": "train",
1242
+ "doc_to_text": "",
1243
+ "doc_to_target": 0,
1244
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1245
+ "description": "",
1246
+ "target_delimiter": " ",
1247
+ "fewshot_delimiter": "\n\n",
1248
+ "num_fewshot": 0,
1249
+ "metric_list": [
1250
+ {
1251
+ "metric": "acc"
1252
+ }
1253
+ ],
1254
+ "output_type": "multiple_choice",
1255
+ "repeats": 1,
1256
+ "should_decontaminate": true,
1257
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1258
+ "metadata": {
1259
+ "version": 1.0
1260
+ }
1261
+ },
1262
+ "blimp_matrix_question_npi_licensor_present": {
1263
+ "task": "blimp_matrix_question_npi_licensor_present",
1264
+ "group": "blimp",
1265
+ "dataset_path": "blimp",
1266
+ "dataset_name": "matrix_question_npi_licensor_present",
1267
+ "validation_split": "train",
1268
+ "doc_to_text": "",
1269
+ "doc_to_target": 0,
1270
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1271
+ "description": "",
1272
+ "target_delimiter": " ",
1273
+ "fewshot_delimiter": "\n\n",
1274
+ "num_fewshot": 0,
1275
+ "metric_list": [
1276
+ {
1277
+ "metric": "acc"
1278
+ }
1279
+ ],
1280
+ "output_type": "multiple_choice",
1281
+ "repeats": 1,
1282
+ "should_decontaminate": true,
1283
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1284
+ "metadata": {
1285
+ "version": 1.0
1286
+ }
1287
+ },
1288
+ "blimp_npi_present_1": {
1289
+ "task": "blimp_npi_present_1",
1290
+ "group": "blimp",
1291
+ "dataset_path": "blimp",
1292
+ "dataset_name": "npi_present_1",
1293
+ "validation_split": "train",
1294
+ "doc_to_text": "",
1295
+ "doc_to_target": 0,
1296
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1297
+ "description": "",
1298
+ "target_delimiter": " ",
1299
+ "fewshot_delimiter": "\n\n",
1300
+ "num_fewshot": 0,
1301
+ "metric_list": [
1302
+ {
1303
+ "metric": "acc"
1304
+ }
1305
+ ],
1306
+ "output_type": "multiple_choice",
1307
+ "repeats": 1,
1308
+ "should_decontaminate": true,
1309
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1310
+ "metadata": {
1311
+ "version": 1.0
1312
+ }
1313
+ },
1314
+ "blimp_npi_present_2": {
1315
+ "task": "blimp_npi_present_2",
1316
+ "group": "blimp",
1317
+ "dataset_path": "blimp",
1318
+ "dataset_name": "npi_present_2",
1319
+ "validation_split": "train",
1320
+ "doc_to_text": "",
1321
+ "doc_to_target": 0,
1322
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1323
+ "description": "",
1324
+ "target_delimiter": " ",
1325
+ "fewshot_delimiter": "\n\n",
1326
+ "num_fewshot": 0,
1327
+ "metric_list": [
1328
+ {
1329
+ "metric": "acc"
1330
+ }
1331
+ ],
1332
+ "output_type": "multiple_choice",
1333
+ "repeats": 1,
1334
+ "should_decontaminate": true,
1335
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1336
+ "metadata": {
1337
+ "version": 1.0
1338
+ }
1339
+ },
1340
+ "blimp_only_npi_licensor_present": {
1341
+ "task": "blimp_only_npi_licensor_present",
1342
+ "group": "blimp",
1343
+ "dataset_path": "blimp",
1344
+ "dataset_name": "only_npi_licensor_present",
1345
+ "validation_split": "train",
1346
+ "doc_to_text": "",
1347
+ "doc_to_target": 0,
1348
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1349
+ "description": "",
1350
+ "target_delimiter": " ",
1351
+ "fewshot_delimiter": "\n\n",
1352
+ "num_fewshot": 0,
1353
+ "metric_list": [
1354
+ {
1355
+ "metric": "acc"
1356
+ }
1357
+ ],
1358
+ "output_type": "multiple_choice",
1359
+ "repeats": 1,
1360
+ "should_decontaminate": true,
1361
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1362
+ "metadata": {
1363
+ "version": 1.0
1364
+ }
1365
+ },
1366
+ "blimp_only_npi_scope": {
1367
+ "task": "blimp_only_npi_scope",
1368
+ "group": "blimp",
1369
+ "dataset_path": "blimp",
1370
+ "dataset_name": "only_npi_scope",
1371
+ "validation_split": "train",
1372
+ "doc_to_text": "",
1373
+ "doc_to_target": 0,
1374
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1375
+ "description": "",
1376
+ "target_delimiter": " ",
1377
+ "fewshot_delimiter": "\n\n",
1378
+ "num_fewshot": 0,
1379
+ "metric_list": [
1380
+ {
1381
+ "metric": "acc"
1382
+ }
1383
+ ],
1384
+ "output_type": "multiple_choice",
1385
+ "repeats": 1,
1386
+ "should_decontaminate": true,
1387
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1388
+ "metadata": {
1389
+ "version": 1.0
1390
+ }
1391
+ },
1392
+ "blimp_passive_1": {
1393
+ "task": "blimp_passive_1",
1394
+ "group": "blimp",
1395
+ "dataset_path": "blimp",
1396
+ "dataset_name": "passive_1",
1397
+ "validation_split": "train",
1398
+ "doc_to_text": "",
1399
+ "doc_to_target": 0,
1400
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1401
+ "description": "",
1402
+ "target_delimiter": " ",
1403
+ "fewshot_delimiter": "\n\n",
1404
+ "num_fewshot": 0,
1405
+ "metric_list": [
1406
+ {
1407
+ "metric": "acc"
1408
+ }
1409
+ ],
1410
+ "output_type": "multiple_choice",
1411
+ "repeats": 1,
1412
+ "should_decontaminate": true,
1413
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1414
+ "metadata": {
1415
+ "version": 1.0
1416
+ }
1417
+ },
1418
+ "blimp_passive_2": {
1419
+ "task": "blimp_passive_2",
1420
+ "group": "blimp",
1421
+ "dataset_path": "blimp",
1422
+ "dataset_name": "passive_2",
1423
+ "validation_split": "train",
1424
+ "doc_to_text": "",
1425
+ "doc_to_target": 0,
1426
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1427
+ "description": "",
1428
+ "target_delimiter": " ",
1429
+ "fewshot_delimiter": "\n\n",
1430
+ "num_fewshot": 0,
1431
+ "metric_list": [
1432
+ {
1433
+ "metric": "acc"
1434
+ }
1435
+ ],
1436
+ "output_type": "multiple_choice",
1437
+ "repeats": 1,
1438
+ "should_decontaminate": true,
1439
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1440
+ "metadata": {
1441
+ "version": 1.0
1442
+ }
1443
+ },
1444
+ "blimp_principle_A_c_command": {
1445
+ "task": "blimp_principle_A_c_command",
1446
+ "group": "blimp",
1447
+ "dataset_path": "blimp",
1448
+ "dataset_name": "principle_A_c_command",
1449
+ "validation_split": "train",
1450
+ "doc_to_text": "",
1451
+ "doc_to_target": 0,
1452
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1453
+ "description": "",
1454
+ "target_delimiter": " ",
1455
+ "fewshot_delimiter": "\n\n",
1456
+ "num_fewshot": 0,
1457
+ "metric_list": [
1458
+ {
1459
+ "metric": "acc"
1460
+ }
1461
+ ],
1462
+ "output_type": "multiple_choice",
1463
+ "repeats": 1,
1464
+ "should_decontaminate": true,
1465
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1466
+ "metadata": {
1467
+ "version": 1.0
1468
+ }
1469
+ },
1470
+ "blimp_principle_A_case_1": {
1471
+ "task": "blimp_principle_A_case_1",
1472
+ "group": "blimp",
1473
+ "dataset_path": "blimp",
1474
+ "dataset_name": "principle_A_case_1",
1475
+ "validation_split": "train",
1476
+ "doc_to_text": "",
1477
+ "doc_to_target": 0,
1478
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1479
+ "description": "",
1480
+ "target_delimiter": " ",
1481
+ "fewshot_delimiter": "\n\n",
1482
+ "num_fewshot": 0,
1483
+ "metric_list": [
1484
+ {
1485
+ "metric": "acc"
1486
+ }
1487
+ ],
1488
+ "output_type": "multiple_choice",
1489
+ "repeats": 1,
1490
+ "should_decontaminate": true,
1491
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1492
+ "metadata": {
1493
+ "version": 1.0
1494
+ }
1495
+ },
1496
+ "blimp_principle_A_case_2": {
1497
+ "task": "blimp_principle_A_case_2",
1498
+ "group": "blimp",
1499
+ "dataset_path": "blimp",
1500
+ "dataset_name": "principle_A_case_2",
1501
+ "validation_split": "train",
1502
+ "doc_to_text": "",
1503
+ "doc_to_target": 0,
1504
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1505
+ "description": "",
1506
+ "target_delimiter": " ",
1507
+ "fewshot_delimiter": "\n\n",
1508
+ "num_fewshot": 0,
1509
+ "metric_list": [
1510
+ {
1511
+ "metric": "acc"
1512
+ }
1513
+ ],
1514
+ "output_type": "multiple_choice",
1515
+ "repeats": 1,
1516
+ "should_decontaminate": true,
1517
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1518
+ "metadata": {
1519
+ "version": 1.0
1520
+ }
1521
+ },
1522
+ "blimp_principle_A_domain_1": {
1523
+ "task": "blimp_principle_A_domain_1",
1524
+ "group": "blimp",
1525
+ "dataset_path": "blimp",
1526
+ "dataset_name": "principle_A_domain_1",
1527
+ "validation_split": "train",
1528
+ "doc_to_text": "",
1529
+ "doc_to_target": 0,
1530
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1531
+ "description": "",
1532
+ "target_delimiter": " ",
1533
+ "fewshot_delimiter": "\n\n",
1534
+ "num_fewshot": 0,
1535
+ "metric_list": [
1536
+ {
1537
+ "metric": "acc"
1538
+ }
1539
+ ],
1540
+ "output_type": "multiple_choice",
1541
+ "repeats": 1,
1542
+ "should_decontaminate": true,
1543
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1544
+ "metadata": {
1545
+ "version": 1.0
1546
+ }
1547
+ },
1548
+ "blimp_principle_A_domain_2": {
1549
+ "task": "blimp_principle_A_domain_2",
1550
+ "group": "blimp",
1551
+ "dataset_path": "blimp",
1552
+ "dataset_name": "principle_A_domain_2",
1553
+ "validation_split": "train",
1554
+ "doc_to_text": "",
1555
+ "doc_to_target": 0,
1556
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1557
+ "description": "",
1558
+ "target_delimiter": " ",
1559
+ "fewshot_delimiter": "\n\n",
1560
+ "num_fewshot": 0,
1561
+ "metric_list": [
1562
+ {
1563
+ "metric": "acc"
1564
+ }
1565
+ ],
1566
+ "output_type": "multiple_choice",
1567
+ "repeats": 1,
1568
+ "should_decontaminate": true,
1569
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1570
+ "metadata": {
1571
+ "version": 1.0
1572
+ }
1573
+ },
1574
+ "blimp_principle_A_domain_3": {
1575
+ "task": "blimp_principle_A_domain_3",
1576
+ "group": "blimp",
1577
+ "dataset_path": "blimp",
1578
+ "dataset_name": "principle_A_domain_3",
1579
+ "validation_split": "train",
1580
+ "doc_to_text": "",
1581
+ "doc_to_target": 0,
1582
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1583
+ "description": "",
1584
+ "target_delimiter": " ",
1585
+ "fewshot_delimiter": "\n\n",
1586
+ "num_fewshot": 0,
1587
+ "metric_list": [
1588
+ {
1589
+ "metric": "acc"
1590
+ }
1591
+ ],
1592
+ "output_type": "multiple_choice",
1593
+ "repeats": 1,
1594
+ "should_decontaminate": true,
1595
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1596
+ "metadata": {
1597
+ "version": 1.0
1598
+ }
1599
+ },
1600
+ "blimp_principle_A_reconstruction": {
1601
+ "task": "blimp_principle_A_reconstruction",
1602
+ "group": "blimp",
1603
+ "dataset_path": "blimp",
1604
+ "dataset_name": "principle_A_reconstruction",
1605
+ "validation_split": "train",
1606
+ "doc_to_text": "",
1607
+ "doc_to_target": 0,
1608
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1609
+ "description": "",
1610
+ "target_delimiter": " ",
1611
+ "fewshot_delimiter": "\n\n",
1612
+ "num_fewshot": 0,
1613
+ "metric_list": [
1614
+ {
1615
+ "metric": "acc"
1616
+ }
1617
+ ],
1618
+ "output_type": "multiple_choice",
1619
+ "repeats": 1,
1620
+ "should_decontaminate": true,
1621
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1622
+ "metadata": {
1623
+ "version": 1.0
1624
+ }
1625
+ },
1626
+ "blimp_regular_plural_subject_verb_agreement_1": {
1627
+ "task": "blimp_regular_plural_subject_verb_agreement_1",
1628
+ "group": "blimp",
1629
+ "dataset_path": "blimp",
1630
+ "dataset_name": "regular_plural_subject_verb_agreement_1",
1631
+ "validation_split": "train",
1632
+ "doc_to_text": "",
1633
+ "doc_to_target": 0,
1634
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1635
+ "description": "",
1636
+ "target_delimiter": " ",
1637
+ "fewshot_delimiter": "\n\n",
1638
+ "num_fewshot": 0,
1639
+ "metric_list": [
1640
+ {
1641
+ "metric": "acc"
1642
+ }
1643
+ ],
1644
+ "output_type": "multiple_choice",
1645
+ "repeats": 1,
1646
+ "should_decontaminate": true,
1647
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1648
+ "metadata": {
1649
+ "version": 1.0
1650
+ }
1651
+ },
1652
+ "blimp_regular_plural_subject_verb_agreement_2": {
1653
+ "task": "blimp_regular_plural_subject_verb_agreement_2",
1654
+ "group": "blimp",
1655
+ "dataset_path": "blimp",
1656
+ "dataset_name": "regular_plural_subject_verb_agreement_2",
1657
+ "validation_split": "train",
1658
+ "doc_to_text": "",
1659
+ "doc_to_target": 0,
1660
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1661
+ "description": "",
1662
+ "target_delimiter": " ",
1663
+ "fewshot_delimiter": "\n\n",
1664
+ "num_fewshot": 0,
1665
+ "metric_list": [
1666
+ {
1667
+ "metric": "acc"
1668
+ }
1669
+ ],
1670
+ "output_type": "multiple_choice",
1671
+ "repeats": 1,
1672
+ "should_decontaminate": true,
1673
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1674
+ "metadata": {
1675
+ "version": 1.0
1676
+ }
1677
+ },
1678
+ "blimp_sentential_negation_npi_licensor_present": {
1679
+ "task": "blimp_sentential_negation_npi_licensor_present",
1680
+ "group": "blimp",
1681
+ "dataset_path": "blimp",
1682
+ "dataset_name": "sentential_negation_npi_licensor_present",
1683
+ "validation_split": "train",
1684
+ "doc_to_text": "",
1685
+ "doc_to_target": 0,
1686
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1687
+ "description": "",
1688
+ "target_delimiter": " ",
1689
+ "fewshot_delimiter": "\n\n",
1690
+ "num_fewshot": 0,
1691
+ "metric_list": [
1692
+ {
1693
+ "metric": "acc"
1694
+ }
1695
+ ],
1696
+ "output_type": "multiple_choice",
1697
+ "repeats": 1,
1698
+ "should_decontaminate": true,
1699
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1700
+ "metadata": {
1701
+ "version": 1.0
1702
+ }
1703
+ },
1704
+ "blimp_sentential_negation_npi_scope": {
1705
+ "task": "blimp_sentential_negation_npi_scope",
1706
+ "group": "blimp",
1707
+ "dataset_path": "blimp",
1708
+ "dataset_name": "sentential_negation_npi_scope",
1709
+ "validation_split": "train",
1710
+ "doc_to_text": "",
1711
+ "doc_to_target": 0,
1712
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1713
+ "description": "",
1714
+ "target_delimiter": " ",
1715
+ "fewshot_delimiter": "\n\n",
1716
+ "num_fewshot": 0,
1717
+ "metric_list": [
1718
+ {
1719
+ "metric": "acc"
1720
+ }
1721
+ ],
1722
+ "output_type": "multiple_choice",
1723
+ "repeats": 1,
1724
+ "should_decontaminate": true,
1725
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1726
+ "metadata": {
1727
+ "version": 1.0
1728
+ }
1729
+ },
1730
+ "blimp_sentential_subject_island": {
1731
+ "task": "blimp_sentential_subject_island",
1732
+ "group": "blimp",
1733
+ "dataset_path": "blimp",
1734
+ "dataset_name": "sentential_subject_island",
1735
+ "validation_split": "train",
1736
+ "doc_to_text": "",
1737
+ "doc_to_target": 0,
1738
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1739
+ "description": "",
1740
+ "target_delimiter": " ",
1741
+ "fewshot_delimiter": "\n\n",
1742
+ "num_fewshot": 0,
1743
+ "metric_list": [
1744
+ {
1745
+ "metric": "acc"
1746
+ }
1747
+ ],
1748
+ "output_type": "multiple_choice",
1749
+ "repeats": 1,
1750
+ "should_decontaminate": true,
1751
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1752
+ "metadata": {
1753
+ "version": 1.0
1754
+ }
1755
+ },
1756
+ "blimp_superlative_quantifiers_1": {
1757
+ "task": "blimp_superlative_quantifiers_1",
1758
+ "group": "blimp",
1759
+ "dataset_path": "blimp",
1760
+ "dataset_name": "superlative_quantifiers_1",
1761
+ "validation_split": "train",
1762
+ "doc_to_text": "",
1763
+ "doc_to_target": 0,
1764
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1765
+ "description": "",
1766
+ "target_delimiter": " ",
1767
+ "fewshot_delimiter": "\n\n",
1768
+ "num_fewshot": 0,
1769
+ "metric_list": [
1770
+ {
1771
+ "metric": "acc"
1772
+ }
1773
+ ],
1774
+ "output_type": "multiple_choice",
1775
+ "repeats": 1,
1776
+ "should_decontaminate": true,
1777
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1778
+ "metadata": {
1779
+ "version": 1.0
1780
+ }
1781
+ },
1782
+ "blimp_superlative_quantifiers_2": {
1783
+ "task": "blimp_superlative_quantifiers_2",
1784
+ "group": "blimp",
1785
+ "dataset_path": "blimp",
1786
+ "dataset_name": "superlative_quantifiers_2",
1787
+ "validation_split": "train",
1788
+ "doc_to_text": "",
1789
+ "doc_to_target": 0,
1790
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1791
+ "description": "",
1792
+ "target_delimiter": " ",
1793
+ "fewshot_delimiter": "\n\n",
1794
+ "num_fewshot": 0,
1795
+ "metric_list": [
1796
+ {
1797
+ "metric": "acc"
1798
+ }
1799
+ ],
1800
+ "output_type": "multiple_choice",
1801
+ "repeats": 1,
1802
+ "should_decontaminate": true,
1803
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1804
+ "metadata": {
1805
+ "version": 1.0
1806
+ }
1807
+ },
1808
+ "blimp_tough_vs_raising_1": {
1809
+ "task": "blimp_tough_vs_raising_1",
1810
+ "group": "blimp",
1811
+ "dataset_path": "blimp",
1812
+ "dataset_name": "tough_vs_raising_1",
1813
+ "validation_split": "train",
1814
+ "doc_to_text": "",
1815
+ "doc_to_target": 0,
1816
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1817
+ "description": "",
1818
+ "target_delimiter": " ",
1819
+ "fewshot_delimiter": "\n\n",
1820
+ "num_fewshot": 0,
1821
+ "metric_list": [
1822
+ {
1823
+ "metric": "acc"
1824
+ }
1825
+ ],
1826
+ "output_type": "multiple_choice",
1827
+ "repeats": 1,
1828
+ "should_decontaminate": true,
1829
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1830
+ "metadata": {
1831
+ "version": 1.0
1832
+ }
1833
+ },
1834
+ "blimp_tough_vs_raising_2": {
1835
+ "task": "blimp_tough_vs_raising_2",
1836
+ "group": "blimp",
1837
+ "dataset_path": "blimp",
1838
+ "dataset_name": "tough_vs_raising_2",
1839
+ "validation_split": "train",
1840
+ "doc_to_text": "",
1841
+ "doc_to_target": 0,
1842
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1843
+ "description": "",
1844
+ "target_delimiter": " ",
1845
+ "fewshot_delimiter": "\n\n",
1846
+ "num_fewshot": 0,
1847
+ "metric_list": [
1848
+ {
1849
+ "metric": "acc"
1850
+ }
1851
+ ],
1852
+ "output_type": "multiple_choice",
1853
+ "repeats": 1,
1854
+ "should_decontaminate": true,
1855
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1856
+ "metadata": {
1857
+ "version": 1.0
1858
+ }
1859
+ },
1860
+ "blimp_transitive": {
1861
+ "task": "blimp_transitive",
1862
+ "group": "blimp",
1863
+ "dataset_path": "blimp",
1864
+ "dataset_name": "transitive",
1865
+ "validation_split": "train",
1866
+ "doc_to_text": "",
1867
+ "doc_to_target": 0,
1868
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1869
+ "description": "",
1870
+ "target_delimiter": " ",
1871
+ "fewshot_delimiter": "\n\n",
1872
+ "num_fewshot": 0,
1873
+ "metric_list": [
1874
+ {
1875
+ "metric": "acc"
1876
+ }
1877
+ ],
1878
+ "output_type": "multiple_choice",
1879
+ "repeats": 1,
1880
+ "should_decontaminate": true,
1881
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1882
+ "metadata": {
1883
+ "version": 1.0
1884
+ }
1885
+ },
1886
+ "blimp_wh_island": {
1887
+ "task": "blimp_wh_island",
1888
+ "group": "blimp",
1889
+ "dataset_path": "blimp",
1890
+ "dataset_name": "wh_island",
1891
+ "validation_split": "train",
1892
+ "doc_to_text": "",
1893
+ "doc_to_target": 0,
1894
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1895
+ "description": "",
1896
+ "target_delimiter": " ",
1897
+ "fewshot_delimiter": "\n\n",
1898
+ "num_fewshot": 0,
1899
+ "metric_list": [
1900
+ {
1901
+ "metric": "acc"
1902
+ }
1903
+ ],
1904
+ "output_type": "multiple_choice",
1905
+ "repeats": 1,
1906
+ "should_decontaminate": true,
1907
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1908
+ "metadata": {
1909
+ "version": 1.0
1910
+ }
1911
+ },
1912
+ "blimp_wh_questions_object_gap": {
1913
+ "task": "blimp_wh_questions_object_gap",
1914
+ "group": "blimp",
1915
+ "dataset_path": "blimp",
1916
+ "dataset_name": "wh_questions_object_gap",
1917
+ "validation_split": "train",
1918
+ "doc_to_text": "",
1919
+ "doc_to_target": 0,
1920
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1921
+ "description": "",
1922
+ "target_delimiter": " ",
1923
+ "fewshot_delimiter": "\n\n",
1924
+ "num_fewshot": 0,
1925
+ "metric_list": [
1926
+ {
1927
+ "metric": "acc"
1928
+ }
1929
+ ],
1930
+ "output_type": "multiple_choice",
1931
+ "repeats": 1,
1932
+ "should_decontaminate": true,
1933
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1934
+ "metadata": {
1935
+ "version": 1.0
1936
+ }
1937
+ },
1938
+ "blimp_wh_questions_subject_gap": {
1939
+ "task": "blimp_wh_questions_subject_gap",
1940
+ "group": "blimp",
1941
+ "dataset_path": "blimp",
1942
+ "dataset_name": "wh_questions_subject_gap",
1943
+ "validation_split": "train",
1944
+ "doc_to_text": "",
1945
+ "doc_to_target": 0,
1946
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1947
+ "description": "",
1948
+ "target_delimiter": " ",
1949
+ "fewshot_delimiter": "\n\n",
1950
+ "num_fewshot": 0,
1951
+ "metric_list": [
1952
+ {
1953
+ "metric": "acc"
1954
+ }
1955
+ ],
1956
+ "output_type": "multiple_choice",
1957
+ "repeats": 1,
1958
+ "should_decontaminate": true,
1959
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1960
+ "metadata": {
1961
+ "version": 1.0
1962
+ }
1963
+ },
1964
+ "blimp_wh_questions_subject_gap_long_distance": {
1965
+ "task": "blimp_wh_questions_subject_gap_long_distance",
1966
+ "group": "blimp",
1967
+ "dataset_path": "blimp",
1968
+ "dataset_name": "wh_questions_subject_gap_long_distance",
1969
+ "validation_split": "train",
1970
+ "doc_to_text": "",
1971
+ "doc_to_target": 0,
1972
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1973
+ "description": "",
1974
+ "target_delimiter": " ",
1975
+ "fewshot_delimiter": "\n\n",
1976
+ "num_fewshot": 0,
1977
+ "metric_list": [
1978
+ {
1979
+ "metric": "acc"
1980
+ }
1981
+ ],
1982
+ "output_type": "multiple_choice",
1983
+ "repeats": 1,
1984
+ "should_decontaminate": true,
1985
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
1986
+ "metadata": {
1987
+ "version": 1.0
1988
+ }
1989
+ },
1990
+ "blimp_wh_vs_that_no_gap": {
1991
+ "task": "blimp_wh_vs_that_no_gap",
1992
+ "group": "blimp",
1993
+ "dataset_path": "blimp",
1994
+ "dataset_name": "wh_vs_that_no_gap",
1995
+ "validation_split": "train",
1996
+ "doc_to_text": "",
1997
+ "doc_to_target": 0,
1998
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
1999
+ "description": "",
2000
+ "target_delimiter": " ",
2001
+ "fewshot_delimiter": "\n\n",
2002
+ "num_fewshot": 0,
2003
+ "metric_list": [
2004
+ {
2005
+ "metric": "acc"
2006
+ }
2007
+ ],
2008
+ "output_type": "multiple_choice",
2009
+ "repeats": 1,
2010
+ "should_decontaminate": true,
2011
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
2012
+ "metadata": {
2013
+ "version": 1.0
2014
+ }
2015
+ },
2016
+ "blimp_wh_vs_that_no_gap_long_distance": {
2017
+ "task": "blimp_wh_vs_that_no_gap_long_distance",
2018
+ "group": "blimp",
2019
+ "dataset_path": "blimp",
2020
+ "dataset_name": "wh_vs_that_no_gap_long_distance",
2021
+ "validation_split": "train",
2022
+ "doc_to_text": "",
2023
+ "doc_to_target": 0,
2024
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
2025
+ "description": "",
2026
+ "target_delimiter": " ",
2027
+ "fewshot_delimiter": "\n\n",
2028
+ "num_fewshot": 0,
2029
+ "metric_list": [
2030
+ {
2031
+ "metric": "acc"
2032
+ }
2033
+ ],
2034
+ "output_type": "multiple_choice",
2035
+ "repeats": 1,
2036
+ "should_decontaminate": true,
2037
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
2038
+ "metadata": {
2039
+ "version": 1.0
2040
+ }
2041
+ },
2042
+ "blimp_wh_vs_that_with_gap": {
2043
+ "task": "blimp_wh_vs_that_with_gap",
2044
+ "group": "blimp",
2045
+ "dataset_path": "blimp",
2046
+ "dataset_name": "wh_vs_that_with_gap",
2047
+ "validation_split": "train",
2048
+ "doc_to_text": "",
2049
+ "doc_to_target": 0,
2050
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
2051
+ "description": "",
2052
+ "target_delimiter": " ",
2053
+ "fewshot_delimiter": "\n\n",
2054
+ "num_fewshot": 0,
2055
+ "metric_list": [
2056
+ {
2057
+ "metric": "acc"
2058
+ }
2059
+ ],
2060
+ "output_type": "multiple_choice",
2061
+ "repeats": 1,
2062
+ "should_decontaminate": true,
2063
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
2064
+ "metadata": {
2065
+ "version": 1.0
2066
+ }
2067
+ },
2068
+ "blimp_wh_vs_that_with_gap_long_distance": {
2069
+ "task": "blimp_wh_vs_that_with_gap_long_distance",
2070
+ "group": "blimp",
2071
+ "dataset_path": "blimp",
2072
+ "dataset_name": "wh_vs_that_with_gap_long_distance",
2073
+ "validation_split": "train",
2074
+ "doc_to_text": "",
2075
+ "doc_to_target": 0,
2076
+ "doc_to_choice": "{{[sentence_good, sentence_bad]}}",
2077
+ "description": "",
2078
+ "target_delimiter": " ",
2079
+ "fewshot_delimiter": "\n\n",
2080
+ "num_fewshot": 0,
2081
+ "metric_list": [
2082
+ {
2083
+ "metric": "acc"
2084
+ }
2085
+ ],
2086
+ "output_type": "multiple_choice",
2087
+ "repeats": 1,
2088
+ "should_decontaminate": true,
2089
+ "doc_to_decontamination_query": "{{sentence_good}} {{sentence_bad}}",
2090
+ "metadata": {
2091
+ "version": 1.0
2092
+ }
2093
+ }
2094
+ },
2095
+ "versions": {
2096
+ "blimp": "N/A",
2097
+ "blimp_adjunct_island": 1.0,
2098
+ "blimp_anaphor_gender_agreement": 1.0,
2099
+ "blimp_anaphor_number_agreement": 1.0,
2100
+ "blimp_animate_subject_passive": 1.0,
2101
+ "blimp_animate_subject_trans": 1.0,
2102
+ "blimp_causative": 1.0,
2103
+ "blimp_complex_NP_island": 1.0,
2104
+ "blimp_coordinate_structure_constraint_complex_left_branch": 1.0,
2105
+ "blimp_coordinate_structure_constraint_object_extraction": 1.0,
2106
+ "blimp_determiner_noun_agreement_1": 1.0,
2107
+ "blimp_determiner_noun_agreement_2": 1.0,
2108
+ "blimp_determiner_noun_agreement_irregular_1": 1.0,
2109
+ "blimp_determiner_noun_agreement_irregular_2": 1.0,
2110
+ "blimp_determiner_noun_agreement_with_adj_2": 1.0,
2111
+ "blimp_determiner_noun_agreement_with_adj_irregular_1": 1.0,
2112
+ "blimp_determiner_noun_agreement_with_adj_irregular_2": 1.0,
2113
+ "blimp_determiner_noun_agreement_with_adjective_1": 1.0,
2114
+ "blimp_distractor_agreement_relational_noun": 1.0,
2115
+ "blimp_distractor_agreement_relative_clause": 1.0,
2116
+ "blimp_drop_argument": 1.0,
2117
+ "blimp_ellipsis_n_bar_1": 1.0,
2118
+ "blimp_ellipsis_n_bar_2": 1.0,
2119
+ "blimp_existential_there_object_raising": 1.0,
2120
+ "blimp_existential_there_quantifiers_1": 1.0,
2121
+ "blimp_existential_there_quantifiers_2": 1.0,
2122
+ "blimp_existential_there_subject_raising": 1.0,
2123
+ "blimp_expletive_it_object_raising": 1.0,
2124
+ "blimp_inchoative": 1.0,
2125
+ "blimp_intransitive": 1.0,
2126
+ "blimp_irregular_past_participle_adjectives": 1.0,
2127
+ "blimp_irregular_past_participle_verbs": 1.0,
2128
+ "blimp_irregular_plural_subject_verb_agreement_1": 1.0,
2129
+ "blimp_irregular_plural_subject_verb_agreement_2": 1.0,
2130
+ "blimp_left_branch_island_echo_question": 1.0,
2131
+ "blimp_left_branch_island_simple_question": 1.0,
2132
+ "blimp_matrix_question_npi_licensor_present": 1.0,
2133
+ "blimp_npi_present_1": 1.0,
2134
+ "blimp_npi_present_2": 1.0,
2135
+ "blimp_only_npi_licensor_present": 1.0,
2136
+ "blimp_only_npi_scope": 1.0,
2137
+ "blimp_passive_1": 1.0,
2138
+ "blimp_passive_2": 1.0,
2139
+ "blimp_principle_A_c_command": 1.0,
2140
+ "blimp_principle_A_case_1": 1.0,
2141
+ "blimp_principle_A_case_2": 1.0,
2142
+ "blimp_principle_A_domain_1": 1.0,
2143
+ "blimp_principle_A_domain_2": 1.0,
2144
+ "blimp_principle_A_domain_3": 1.0,
2145
+ "blimp_principle_A_reconstruction": 1.0,
2146
+ "blimp_regular_plural_subject_verb_agreement_1": 1.0,
2147
+ "blimp_regular_plural_subject_verb_agreement_2": 1.0,
2148
+ "blimp_sentential_negation_npi_licensor_present": 1.0,
2149
+ "blimp_sentential_negation_npi_scope": 1.0,
2150
+ "blimp_sentential_subject_island": 1.0,
2151
+ "blimp_superlative_quantifiers_1": 1.0,
2152
+ "blimp_superlative_quantifiers_2": 1.0,
2153
+ "blimp_tough_vs_raising_1": 1.0,
2154
+ "blimp_tough_vs_raising_2": 1.0,
2155
+ "blimp_transitive": 1.0,
2156
+ "blimp_wh_island": 1.0,
2157
+ "blimp_wh_questions_object_gap": 1.0,
2158
+ "blimp_wh_questions_subject_gap": 1.0,
2159
+ "blimp_wh_questions_subject_gap_long_distance": 1.0,
2160
+ "blimp_wh_vs_that_no_gap": 1.0,
2161
+ "blimp_wh_vs_that_no_gap_long_distance": 1.0,
2162
+ "blimp_wh_vs_that_with_gap": 1.0,
2163
+ "blimp_wh_vs_that_with_gap_long_distance": 1.0
2164
+ },
2165
+ "n-shot": {
2166
+ "blimp": 0,
2167
+ "blimp_adjunct_island": 0,
2168
+ "blimp_anaphor_gender_agreement": 0,
2169
+ "blimp_anaphor_number_agreement": 0,
2170
+ "blimp_animate_subject_passive": 0,
2171
+ "blimp_animate_subject_trans": 0,
2172
+ "blimp_causative": 0,
2173
+ "blimp_complex_NP_island": 0,
2174
+ "blimp_coordinate_structure_constraint_complex_left_branch": 0,
2175
+ "blimp_coordinate_structure_constraint_object_extraction": 0,
2176
+ "blimp_determiner_noun_agreement_1": 0,
2177
+ "blimp_determiner_noun_agreement_2": 0,
2178
+ "blimp_determiner_noun_agreement_irregular_1": 0,
2179
+ "blimp_determiner_noun_agreement_irregular_2": 0,
2180
+ "blimp_determiner_noun_agreement_with_adj_2": 0,
2181
+ "blimp_determiner_noun_agreement_with_adj_irregular_1": 0,
2182
+ "blimp_determiner_noun_agreement_with_adj_irregular_2": 0,
2183
+ "blimp_determiner_noun_agreement_with_adjective_1": 0,
2184
+ "blimp_distractor_agreement_relational_noun": 0,
2185
+ "blimp_distractor_agreement_relative_clause": 0,
2186
+ "blimp_drop_argument": 0,
2187
+ "blimp_ellipsis_n_bar_1": 0,
2188
+ "blimp_ellipsis_n_bar_2": 0,
2189
+ "blimp_existential_there_object_raising": 0,
2190
+ "blimp_existential_there_quantifiers_1": 0,
2191
+ "blimp_existential_there_quantifiers_2": 0,
2192
+ "blimp_existential_there_subject_raising": 0,
2193
+ "blimp_expletive_it_object_raising": 0,
2194
+ "blimp_inchoative": 0,
2195
+ "blimp_intransitive": 0,
2196
+ "blimp_irregular_past_participle_adjectives": 0,
2197
+ "blimp_irregular_past_participle_verbs": 0,
2198
+ "blimp_irregular_plural_subject_verb_agreement_1": 0,
2199
+ "blimp_irregular_plural_subject_verb_agreement_2": 0,
2200
+ "blimp_left_branch_island_echo_question": 0,
2201
+ "blimp_left_branch_island_simple_question": 0,
2202
+ "blimp_matrix_question_npi_licensor_present": 0,
2203
+ "blimp_npi_present_1": 0,
2204
+ "blimp_npi_present_2": 0,
2205
+ "blimp_only_npi_licensor_present": 0,
2206
+ "blimp_only_npi_scope": 0,
2207
+ "blimp_passive_1": 0,
2208
+ "blimp_passive_2": 0,
2209
+ "blimp_principle_A_c_command": 0,
2210
+ "blimp_principle_A_case_1": 0,
2211
+ "blimp_principle_A_case_2": 0,
2212
+ "blimp_principle_A_domain_1": 0,
2213
+ "blimp_principle_A_domain_2": 0,
2214
+ "blimp_principle_A_domain_3": 0,
2215
+ "blimp_principle_A_reconstruction": 0,
2216
+ "blimp_regular_plural_subject_verb_agreement_1": 0,
2217
+ "blimp_regular_plural_subject_verb_agreement_2": 0,
2218
+ "blimp_sentential_negation_npi_licensor_present": 0,
2219
+ "blimp_sentential_negation_npi_scope": 0,
2220
+ "blimp_sentential_subject_island": 0,
2221
+ "blimp_superlative_quantifiers_1": 0,
2222
+ "blimp_superlative_quantifiers_2": 0,
2223
+ "blimp_tough_vs_raising_1": 0,
2224
+ "blimp_tough_vs_raising_2": 0,
2225
+ "blimp_transitive": 0,
2226
+ "blimp_wh_island": 0,
2227
+ "blimp_wh_questions_object_gap": 0,
2228
+ "blimp_wh_questions_subject_gap": 0,
2229
+ "blimp_wh_questions_subject_gap_long_distance": 0,
2230
+ "blimp_wh_vs_that_no_gap": 0,
2231
+ "blimp_wh_vs_that_no_gap_long_distance": 0,
2232
+ "blimp_wh_vs_that_with_gap": 0,
2233
+ "blimp_wh_vs_that_with_gap_long_distance": 0
2234
+ },
2235
+ "config": {
2236
+ "model": "hf",
2237
+ "model_args": "pretrained=/home/bastian/Dokumente/baby_llamas/models/final_20",
2238
+ "batch_size": 1,
2239
+ "batch_sizes": [],
2240
+ "device": "cuda",
2241
+ "use_cache": null,
2242
+ "limit": null,
2243
+ "bootstrap_iters": 100000,
2244
+ "gen_kwargs": null
2245
+ },
2246
+ "git_hash": null
2247
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<pad>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "pad_token": "<pad>",
7
+ "tokenizer_class": "PreTrainedTokenizerFast"
8
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ed56affee867809a1f3c1a26f8cc48e4c93e78e6accaf0951a246d171a4a29
3
+ size 4536