Update README.md
Browse files
README.md
CHANGED
@@ -12,7 +12,7 @@ model-index:
|
|
12 |
name: MMLU
|
13 |
metrics:
|
14 |
- type: 5-shot
|
15 |
-
value:
|
16 |
verified: false
|
17 |
- task:
|
18 |
type: text-generation
|
@@ -21,7 +21,7 @@ model-index:
|
|
21 |
name: GPQA
|
22 |
metrics:
|
23 |
- type: 0-shot
|
24 |
-
value: 29.
|
25 |
verified: false
|
26 |
- task:
|
27 |
type: text-generation
|
@@ -30,7 +30,7 @@ model-index:
|
|
30 |
name: GSM-8K
|
31 |
metrics:
|
32 |
- type: 8-shot, CoT
|
33 |
-
value:
|
34 |
verified: false
|
35 |
- task:
|
36 |
type: text-generation
|
@@ -39,7 +39,7 @@ model-index:
|
|
39 |
name: MATH
|
40 |
metrics:
|
41 |
- type: 4-shot, CoT
|
42 |
-
value:
|
43 |
verified: false
|
44 |
- task:
|
45 |
type: text-generation
|
|
|
12 |
name: MMLU
|
13 |
metrics:
|
14 |
- type: 5-shot
|
15 |
+
value: 67.87
|
16 |
verified: false
|
17 |
- task:
|
18 |
type: text-generation
|
|
|
21 |
name: GPQA
|
22 |
metrics:
|
23 |
- type: 0-shot
|
24 |
+
value: 29.69
|
25 |
verified: false
|
26 |
- task:
|
27 |
type: text-generation
|
|
|
30 |
name: GSM-8K
|
31 |
metrics:
|
32 |
- type: 8-shot, CoT
|
33 |
+
value: 79.45
|
34 |
verified: false
|
35 |
- task:
|
36 |
type: text-generation
|
|
|
39 |
name: MATH
|
40 |
metrics:
|
41 |
- type: 4-shot, CoT
|
42 |
+
value: 30.80
|
43 |
verified: false
|
44 |
- task:
|
45 |
type: text-generation
|