Spaces:
Running
Running
Andrea Seveso
commited on
Commit
•
48d0e61
1
Parent(s):
e6aadde
Add claude-3.5-sonnet
Browse files- src/macro_area.csv +1 -0
- src/question_format.csv +1 -0
src/macro_area.csv
CHANGED
@@ -6,6 +6,7 @@ Minerva-3B-base-v1.0,4.6,3.9,9.1,28.6,3.4,4.2,0.0,5.3,0.0
|
|
6 |
claude-3-haiku,78.7,86.0,75.8,71.4,65.5,62.5,0.0,57.9,83.3
|
7 |
claude-3-opus,91.7,91.6,78.8,100.0,82.8,75.0,50.0,89.5,83.3
|
8 |
claude-3-sonnet,87.0,90.5,75.8,100.0,62.1,75.0,0.0,52.6,100.0
|
|
|
9 |
command-r-plus,74.1,80.4,81.8,71.4,65.5,66.7,0.0,57.9,83.3
|
10 |
gemini-flash-1.5,83.3,85.5,81.8,85.7,62.1,83.3,25.0,63.2,66.7
|
11 |
gemini-pro,78.7,82.1,81.8,71.4,51.7,70.8,0.0,68.4,66.7
|
|
|
6 |
claude-3-haiku,78.7,86.0,75.8,71.4,65.5,62.5,0.0,57.9,83.3
|
7 |
claude-3-opus,91.7,91.6,78.8,100.0,82.8,75.0,50.0,89.5,83.3
|
8 |
claude-3-sonnet,87.0,90.5,75.8,100.0,62.1,75.0,0.0,52.6,100.0
|
9 |
+
claude-3.5-sonnet:beta,92.6,95.0,84.8,100.0,93.1,87.5,25.0,94.7,83.3
|
10 |
command-r-plus,74.1,80.4,81.8,71.4,65.5,66.7,0.0,57.9,83.3
|
11 |
gemini-flash-1.5,83.3,85.5,81.8,85.7,62.1,83.3,25.0,63.2,66.7
|
12 |
gemini-pro,78.7,82.1,81.8,71.4,51.7,70.8,0.0,68.4,66.7
|
src/question_format.csv
CHANGED
@@ -6,6 +6,7 @@ Minerva-3B-base-v1.0,0.0,0.0,0.0,13.3,0.0,0.0,0.0,0.0,0.0,0.0,8.6,0.0,0.0,0.0,6.
|
|
6 |
claude-3-haiku,100.0,50.0,0.0,91.7,28.6,0.0,33.3,84.5,57.1,77.8,85.2,100.0,75.0,50.0,75.0,46.2,64.3,71.4,12.5
|
7 |
claude-3-opus,100.0,100.0,100.0,98.3,71.4,100.0,33.3,93.0,85.7,88.9,93.8,0.0,100.0,50.0,85.4,61.5,71.4,90.5,25.0
|
8 |
claude-3-sonnet,100.0,100.0,100.0,96.7,85.7,100.0,50.0,88.7,57.1,66.7,87.6,0.0,75.0,50.0,81.2,53.8,64.3,78.6,12.5
|
|
|
9 |
command-r-plus,90.6,0.0,100.0,88.3,14.3,0.0,50.0,80.3,57.1,66.7,85.2,0.0,100.0,50.0,79.2,46.2,57.1,61.9,12.5
|
10 |
gemini-flash-1.5,90.6,0.0,0.0,86.7,71.4,100.0,33.3,93.0,85.7,88.9,88.9,0.0,100.0,50.0,81.2,38.5,50.0,81.0,0.0
|
11 |
gemini-pro,96.9,0.0,0.0,90.0,14.3,0.0,16.7,80.3,71.4,66.7,88.9,0.0,100.0,0.0,79.2,46.2,64.3,69.0,0.0
|
|
|
6 |
claude-3-haiku,100.0,50.0,0.0,91.7,28.6,0.0,33.3,84.5,57.1,77.8,85.2,100.0,75.0,50.0,75.0,46.2,64.3,71.4,12.5
|
7 |
claude-3-opus,100.0,100.0,100.0,98.3,71.4,100.0,33.3,93.0,85.7,88.9,93.8,0.0,100.0,50.0,85.4,61.5,71.4,90.5,25.0
|
8 |
claude-3-sonnet,100.0,100.0,100.0,96.7,85.7,100.0,50.0,88.7,57.1,66.7,87.6,0.0,75.0,50.0,81.2,53.8,64.3,78.6,12.5
|
9 |
+
claude-3.5-sonnet:beta,100.0,100.0,100.0,100.0,85.7,100.0,50.0,97.2,100.0,88.9,95.1,100.0,100.0,50.0,93.8,69.2,50.0,92.9,62.5
|
10 |
command-r-plus,90.6,0.0,100.0,88.3,14.3,0.0,50.0,80.3,57.1,66.7,85.2,0.0,100.0,50.0,79.2,46.2,57.1,61.9,12.5
|
11 |
gemini-flash-1.5,90.6,0.0,0.0,86.7,71.4,100.0,33.3,93.0,85.7,88.9,88.9,0.0,100.0,50.0,81.2,38.5,50.0,81.0,0.0
|
12 |
gemini-pro,96.9,0.0,0.0,90.0,14.3,0.0,16.7,80.3,71.4,66.7,88.9,0.0,100.0,0.0,79.2,46.2,64.3,69.0,0.0
|