DanielePoterti commited on
Commit
89b62a5
·
verified ·
1 Parent(s): 4c295e7

upload results

Browse files
Files changed (2) hide show
  1. src/macro_area.csv +1 -0
  2. src/question_format.csv +1 -0
src/macro_area.csv CHANGED
@@ -12,6 +12,7 @@ command-r-plus,74.1,80.4,81.8,71.4,65.5,66.7,0.0,57.9,83.3
12
  gemini-flash-1.5,83.3,85.5,81.8,85.7,62.1,83.3,25.0,63.2,66.7
13
  gemini-pro,78.7,82.1,81.8,71.4,51.7,70.8,0.0,68.4,66.7
14
  gemini-pro-1.5,90.7,87.7,84.8,57.1,55.2,58.3,25.0,63.2,33.3
 
15
  gpt-3.5-turbo-0125,61.1,64.8,63.6,42.9,55.2,58.3,0.0,47.4,83.3
16
  gpt-4-turbo,77.8,82.1,75.8,71.4,82.8,75.0,50.0,73.7,100.0
17
  gpt-4o,64.8,69.8,51.5,100.0,69.0,87.5,0.0,89.5,100.0
 
12
  gemini-flash-1.5,83.3,85.5,81.8,85.7,62.1,83.3,25.0,63.2,66.7
13
  gemini-pro,78.7,82.1,81.8,71.4,51.7,70.8,0.0,68.4,66.7
14
  gemini-pro-1.5,90.7,87.7,84.8,57.1,55.2,58.3,25.0,63.2,33.3
15
+ gemma-2-9b-it,75.9,82.7,66.7,71.4,51.7,58.3,0.0,57.9,83.3
16
  gpt-3.5-turbo-0125,61.1,64.8,63.6,42.9,55.2,58.3,0.0,47.4,83.3
17
  gpt-4-turbo,77.8,82.1,75.8,71.4,82.8,75.0,50.0,73.7,100.0
18
  gpt-4o,64.8,69.8,51.5,100.0,69.0,87.5,0.0,89.5,100.0
src/question_format.csv CHANGED
@@ -12,6 +12,7 @@ command-r-plus,90.6,0.0,100.0,88.3,14.3,0.0,50.0,80.3,57.1,66.7,85.2,0.0,100.0,5
12
  gemini-flash-1.5,90.6,0.0,0.0,86.7,71.4,100.0,33.3,93.0,85.7,88.9,88.9,0.0,100.0,50.0,81.2,38.5,50.0,81.0,0.0
13
  gemini-pro,96.9,0.0,0.0,90.0,14.3,0.0,16.7,80.3,71.4,66.7,88.9,0.0,100.0,0.0,79.2,46.2,64.3,69.0,0.0
14
  gemini-pro-1.5,96.9,0.0,0.0,90.0,42.9,100.0,33.3,87.3,42.9,77.8,87.6,0.0,100.0,50.0,79.2,46.2,85.7,85.7,12.5
 
15
  gpt-3.5-turbo-0125,84.4,0.0,0.0,73.3,14.3,0.0,50.0,53.5,42.9,44.4,67.9,0.0,75.0,50.0,68.8,46.2,71.4,52.4,0.0
16
  gpt-4-turbo,100.0,100.0,100.0,91.7,71.4,100.0,66.7,63.4,100.0,88.9,92.6,0.0,100.0,50.0,87.5,61.5,50.0,64.3,12.5
17
  gpt-4o,78.1,100.0,100.0,83.3,71.4,100.0,66.7,66.2,85.7,77.8,80.2,0.0,100.0,0.0,68.8,38.5,78.6,38.1,12.5
 
12
  gemini-flash-1.5,90.6,0.0,0.0,86.7,71.4,100.0,33.3,93.0,85.7,88.9,88.9,0.0,100.0,50.0,81.2,38.5,50.0,81.0,0.0
13
  gemini-pro,96.9,0.0,0.0,90.0,14.3,0.0,16.7,80.3,71.4,66.7,88.9,0.0,100.0,0.0,79.2,46.2,64.3,69.0,0.0
14
  gemini-pro-1.5,96.9,0.0,0.0,90.0,42.9,100.0,33.3,87.3,42.9,77.8,87.6,0.0,100.0,50.0,79.2,46.2,85.7,85.7,12.5
15
+ gemma-2-9b-it,96.9,50.0,0.0,88.3,14.3,0.0,33.3,83.1,42.9,66.7,77.8,0.0,50.0,0.0,79.2,61.5,57.1,61.9,12.5
16
  gpt-3.5-turbo-0125,84.4,0.0,0.0,73.3,14.3,0.0,50.0,53.5,42.9,44.4,67.9,0.0,75.0,50.0,68.8,46.2,71.4,52.4,0.0
17
  gpt-4-turbo,100.0,100.0,100.0,91.7,71.4,100.0,66.7,63.4,100.0,88.9,92.6,0.0,100.0,50.0,87.5,61.5,50.0,64.3,12.5
18
  gpt-4o,78.1,100.0,100.0,83.3,71.4,100.0,66.7,66.2,85.7,77.8,80.2,0.0,100.0,0.0,68.8,38.5,78.6,38.1,12.5