Files changed (1) hide show
  1. README.md +106 -0
README.md CHANGED
@@ -115,6 +115,98 @@ model-index:
115
  source:
116
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
117
  name: Open LLM Leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ---
119
 
120
 
@@ -163,3 +255,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
163
  |Winogrande (5-shot) |80.98|
164
  |GSM8k (5-shot) |65.13|
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  source:
116
  url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
117
  name: Open LLM Leaderboard
118
+ - task:
119
+ type: text-generation
120
+ name: Text Generation
121
+ dataset:
122
+ name: IFEval (0-Shot)
123
+ type: HuggingFaceH4/ifeval
124
+ args:
125
+ num_few_shot: 0
126
+ metrics:
127
+ - type: inst_level_strict_acc and prompt_level_strict_acc
128
+ value: 54.93
129
+ name: strict accuracy
130
+ source:
131
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
132
+ name: Open LLM Leaderboard
133
+ - task:
134
+ type: text-generation
135
+ name: Text Generation
136
+ dataset:
137
+ name: BBH (3-Shot)
138
+ type: BBH
139
+ args:
140
+ num_few_shot: 3
141
+ metrics:
142
+ - type: acc_norm
143
+ value: 24.54
144
+ name: normalized accuracy
145
+ source:
146
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
147
+ name: Open LLM Leaderboard
148
+ - task:
149
+ type: text-generation
150
+ name: Text Generation
151
+ dataset:
152
+ name: MATH Lvl 5 (4-Shot)
153
+ type: hendrycks/competition_math
154
+ args:
155
+ num_few_shot: 4
156
+ metrics:
157
+ - type: exact_match
158
+ value: 9.52
159
+ name: exact match
160
+ source:
161
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
162
+ name: Open LLM Leaderboard
163
+ - task:
164
+ type: text-generation
165
+ name: Text Generation
166
+ dataset:
167
+ name: GPQA (0-shot)
168
+ type: Idavidrein/gpqa
169
+ args:
170
+ num_few_shot: 0
171
+ metrics:
172
+ - type: acc_norm
173
+ value: 7.27
174
+ name: acc_norm
175
+ source:
176
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
177
+ name: Open LLM Leaderboard
178
+ - task:
179
+ type: text-generation
180
+ name: Text Generation
181
+ dataset:
182
+ name: MuSR (0-shot)
183
+ type: TAUR-Lab/MuSR
184
+ args:
185
+ num_few_shot: 0
186
+ metrics:
187
+ - type: acc_norm
188
+ value: 5.28
189
+ name: acc_norm
190
+ source:
191
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
192
+ name: Open LLM Leaderboard
193
+ - task:
194
+ type: text-generation
195
+ name: Text Generation
196
+ dataset:
197
+ name: MMLU-PRO (5-shot)
198
+ type: TIGER-Lab/MMLU-Pro
199
+ config: main
200
+ split: test
201
+ args:
202
+ num_few_shot: 5
203
+ metrics:
204
+ - type: acc
205
+ value: 31.16
206
+ name: accuracy
207
+ source:
208
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k
209
+ name: Open LLM Leaderboard
210
  ---
211
 
212
 
 
255
  |Winogrande (5-shot) |80.98|
256
  |GSM8k (5-shot) |65.13|
257
 
258
+
259
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
260
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_OpenBuddy__openbuddy-mixtral-7bx8-v18.1-32k)
261
+
262
+ | Metric |Value|
263
+ |-------------------|----:|
264
+ |Avg. |22.12|
265
+ |IFEval (0-Shot) |54.93|
266
+ |BBH (3-Shot) |24.54|
267
+ |MATH Lvl 5 (4-Shot)| 9.52|
268
+ |GPQA (0-shot) | 7.27|
269
+ |MuSR (0-shot) | 5.28|
270
+ |MMLU-PRO (5-shot) |31.16|
271
+