qnguyen3 commited on
Commit
ab3d151
1 Parent(s): 4c96187

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -2
README.md CHANGED
@@ -77,8 +77,9 @@ Nous Benchmark:
77
  |---------------------------------------------------|------:|------:|---------:|-------:|------:|
78
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)| 43.55| 71.48| 48.54| 41.43| 51.25|
79
 
80
- ```
81
  ### AGIEval
 
82
  | Task |Version| Metric |Value| |Stderr|
83
  |------------------------------|------:|--------|----:|---|-----:|
84
  |agieval_aqua_rat | 0|acc |35.83|± | 3.01|
@@ -99,8 +100,10 @@ Nous Benchmark:
99
  | | |acc_norm|47.73|± | 3.38|
100
 
101
  Average: 43.55%
 
102
 
103
  ### GPT4All
 
104
  | Task |Version| Metric |Value| |Stderr|
105
  |-------------|------:|--------|----:|---|-----:|
106
  |arc_challenge| 0|acc |54.95|± | 1.45|
@@ -117,16 +120,20 @@ Average: 43.55%
117
  |winogrande | 0|acc |72.61|± | 1.25|
118
 
119
  Average: 71.48%
 
120
 
121
  ### TruthfulQA
 
122
  | Task |Version|Metric|Value| |Stderr|
123
  |-------------|------:|------|----:|---|-----:|
124
  |truthfulqa_mc| 1|mc1 |33.05|± | 1.65|
125
  | | |mc2 |48.54|± | 1.54|
126
 
127
  Average: 48.54%
 
128
 
129
  ### Bigbench
 
130
  | Task |Version| Metric |Value| |Stderr|
131
  |------------------------------------------------|------:|---------------------|----:|---|-----:|
132
  |bigbench_causal_judgement | 0|multiple_choice_grade|54.74|± | 3.62|
@@ -160,8 +167,8 @@ OpenLLM Benchmark:
160
  |---------------------------------------------------|---:|--------:|----:|---------:|---------:|----:|------:|
161
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)|61.6| 79.89|69.95| 48.59| 77.35|67.48| 67.48|
162
 
163
- ```
164
  ### ARC
 
165
  | Task |Version| Metric | Value | |Stderr|
166
  |-------------|------:|--------------------|-------------|---|------|
167
  |arc_challenge| 1|acc,none | 0.59| | |
@@ -171,8 +178,10 @@ OpenLLM Benchmark:
171
  | | |alias |arc_challenge| | |
172
 
173
  Average: 61.6%
 
174
 
175
  ### HellaSwag
 
176
  | Task |Version| Metric | Value | |Stderr|
177
  |---------|------:|--------------------|---------|---|------|
178
  |hellaswag| 1|acc,none | 0.61| | |
@@ -182,8 +191,10 @@ Average: 61.6%
182
  | | |alias |hellaswag| | |
183
 
184
  Average: 79.89%
 
185
 
186
  ### MMLU
 
187
  | Task |Version| Metric | Value | |Stderr|
188
  |----------------------------------------|-------|---------------|---------------------------------------|---|------|
189
  |mmlu |N/A |acc,none | 0.7| | |
@@ -374,8 +385,10 @@ Average: 79.89%
374
  | | |acc_stderr,none|0.03 | | |
375
 
376
  Average: 69.95%
 
377
 
378
  ### TruthfulQA
 
379
  | Task |Version| Metric | Value | |Stderr|
380
  |--------------|-------|-----------------------|-----------------|---|------|
381
  |truthfulqa |N/A |bleu_acc,none | 0.45| | |
@@ -438,8 +451,10 @@ Average: 69.95%
438
  | | |alias | - truthfulqa_mc2| | |
439
 
440
  Average: 48.59%
 
441
 
442
  ### Winogrande
 
443
  | Task |Version| Metric | Value | |Stderr|
444
  |----------|------:|---------------|----------|---|------|
445
  |winogrande| 1|acc,none | 0.77| | |
@@ -447,8 +462,10 @@ Average: 48.59%
447
  | | |alias |winogrande| | |
448
 
449
  Average: 77.35%
 
450
 
451
  ### GSM8K
 
452
  |Task |Version| Metric |Value| |Stderr|
453
  |-----|------:|-----------------------------------|-----|---|------|
454
  |gsm8k| 3|exact_match,strict-match | 0.67| | |
 
77
  |---------------------------------------------------|------:|------:|---------:|-------:|------:|
78
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)| 43.55| 71.48| 48.54| 41.43| 51.25|
79
 
80
+
81
  ### AGIEval
82
+ ```
83
  | Task |Version| Metric |Value| |Stderr|
84
  |------------------------------|------:|--------|----:|---|-----:|
85
  |agieval_aqua_rat | 0|acc |35.83|± | 3.01|
 
100
  | | |acc_norm|47.73|± | 3.38|
101
 
102
  Average: 43.55%
103
+ ```
104
 
105
  ### GPT4All
106
+ ```
107
  | Task |Version| Metric |Value| |Stderr|
108
  |-------------|------:|--------|----:|---|-----:|
109
  |arc_challenge| 0|acc |54.95|± | 1.45|
 
120
  |winogrande | 0|acc |72.61|± | 1.25|
121
 
122
  Average: 71.48%
123
+ ```
124
 
125
  ### TruthfulQA
126
+ ```
127
  | Task |Version|Metric|Value| |Stderr|
128
  |-------------|------:|------|----:|---|-----:|
129
  |truthfulqa_mc| 1|mc1 |33.05|± | 1.65|
130
  | | |mc2 |48.54|± | 1.54|
131
 
132
  Average: 48.54%
133
+ ```
134
 
135
  ### Bigbench
136
+ ```
137
  | Task |Version| Metric |Value| |Stderr|
138
  |------------------------------------------------|------:|---------------------|----:|---|-----:|
139
  |bigbench_causal_judgement | 0|multiple_choice_grade|54.74|± | 3.62|
 
167
  |---------------------------------------------------|---:|--------:|----:|---------:|---------:|----:|------:|
168
  |[Master-Yi-9B](https://huggingface.co/qnguyen3/Master-Yi-9B)|61.6| 79.89|69.95| 48.59| 77.35|67.48| 67.48|
169
 
 
170
  ### ARC
171
+ ```
172
  | Task |Version| Metric | Value | |Stderr|
173
  |-------------|------:|--------------------|-------------|---|------|
174
  |arc_challenge| 1|acc,none | 0.59| | |
 
178
  | | |alias |arc_challenge| | |
179
 
180
  Average: 61.6%
181
+ ```
182
 
183
  ### HellaSwag
184
+ ```
185
  | Task |Version| Metric | Value | |Stderr|
186
  |---------|------:|--------------------|---------|---|------|
187
  |hellaswag| 1|acc,none | 0.61| | |
 
191
  | | |alias |hellaswag| | |
192
 
193
  Average: 79.89%
194
+ ```
195
 
196
  ### MMLU
197
+ ```
198
  | Task |Version| Metric | Value | |Stderr|
199
  |----------------------------------------|-------|---------------|---------------------------------------|---|------|
200
  |mmlu |N/A |acc,none | 0.7| | |
 
385
  | | |acc_stderr,none|0.03 | | |
386
 
387
  Average: 69.95%
388
+ ```
389
 
390
  ### TruthfulQA
391
+ ```
392
  | Task |Version| Metric | Value | |Stderr|
393
  |--------------|-------|-----------------------|-----------------|---|------|
394
  |truthfulqa |N/A |bleu_acc,none | 0.45| | |
 
451
  | | |alias | - truthfulqa_mc2| | |
452
 
453
  Average: 48.59%
454
+ ```
455
 
456
  ### Winogrande
457
+ ```
458
  | Task |Version| Metric | Value | |Stderr|
459
  |----------|------:|---------------|----------|---|------|
460
  |winogrande| 1|acc,none | 0.77| | |
 
462
  | | |alias |winogrande| | |
463
 
464
  Average: 77.35%
465
+ ```
466
 
467
  ### GSM8K
468
+ ```
469
  |Task |Version| Metric |Value| |Stderr|
470
  |-----|------:|-----------------------------------|-----|---|------|
471
  |gsm8k| 3|exact_match,strict-match | 0.67| | |