DanielePoterti commited on
Commit
cd0e904
·
verified ·
1 Parent(s): 565a777

update models

Browse files
Files changed (2) hide show
  1. src/macro_area.csv +2 -0
  2. src/question_format.csv +2 -0
src/macro_area.csv CHANGED
@@ -3,6 +3,7 @@ MacroAspetto,Localizzare e individuare informazioni all’interno del testo,"Ric
3
  Model,,,,,,,,,
4
  LLaMAntino-3-ANITA-8B-Inst-DPO-ITA,60.2,63.1,78.8,28.6,37.9,16.7,0.0,26.3,50.0
5
  Minerva-3B-base-v1.0,4.6,3.9,9.1,28.6,3.4,4.2,0.0,5.3,0.0
 
6
  claude-3-haiku,78.7,86.0,75.8,71.4,65.5,62.5,0.0,57.9,83.3
7
  claude-3-opus,91.7,91.6,78.8,100.0,82.8,75.0,50.0,89.5,83.3
8
  claude-3-sonnet,87.0,90.5,75.8,100.0,62.1,75.0,0.0,52.6,100.0
@@ -16,6 +17,7 @@ gpt-4-turbo,77.8,82.1,75.8,71.4,82.8,75.0,50.0,73.7,100.0
16
  gpt-4o,64.8,69.8,51.5,100.0,69.0,87.5,0.0,89.5,100.0
17
  llama-3-70b-instruct,83.3,85.5,75.8,71.4,55.2,33.3,0.0,47.4,50.0
18
  llama-3-8b-instruct,48.2,53.6,63.6,14.3,34.5,29.2,0.0,31.6,50.0
 
19
  mistral-7b-instruct:nitro,51.8,59.2,51.5,28.6,37.9,29.2,0.0,31.6,33.3
20
  mixtral-8x22b-instruct,74.1,76.0,72.7,28.6,44.8,66.7,0.0,31.6,66.7
21
  mixtral-8x7b-instruct,74.1,77.1,69.7,42.9,37.9,50.0,0.0,52.6,50.0
 
3
  Model,,,,,,,,,
4
  LLaMAntino-3-ANITA-8B-Inst-DPO-ITA,60.2,63.1,78.8,28.6,37.9,16.7,0.0,26.3,50.0
5
  Minerva-3B-base-v1.0,4.6,3.9,9.1,28.6,3.4,4.2,0.0,5.3,0.0
6
+ Minerva_3B_Ties_1.0,37.0,20.7,36.4,14.3,44.8,41.7,0.0,31.6,66.7
7
  claude-3-haiku,78.7,86.0,75.8,71.4,65.5,62.5,0.0,57.9,83.3
8
  claude-3-opus,91.7,91.6,78.8,100.0,82.8,75.0,50.0,89.5,83.3
9
  claude-3-sonnet,87.0,90.5,75.8,100.0,62.1,75.0,0.0,52.6,100.0
 
17
  gpt-4o,64.8,69.8,51.5,100.0,69.0,87.5,0.0,89.5,100.0
18
  llama-3-70b-instruct,83.3,85.5,75.8,71.4,55.2,33.3,0.0,47.4,50.0
19
  llama-3-8b-instruct,48.2,53.6,63.6,14.3,34.5,29.2,0.0,31.6,50.0
20
+ maestrale-chat-v0.4-beta,62.0,61.4,60.6,42.9,44.8,33.3,0.0,15.8,50.0
21
  mistral-7b-instruct:nitro,51.8,59.2,51.5,28.6,37.9,29.2,0.0,31.6,33.3
22
  mixtral-8x22b-instruct,74.1,76.0,72.7,28.6,44.8,66.7,0.0,31.6,66.7
23
  mixtral-8x7b-instruct,74.1,77.1,69.7,42.9,37.9,50.0,0.0,52.6,50.0
src/question_format.csv CHANGED
@@ -3,6 +3,7 @@ Type,MC,MCC,CL,MC,MCC,RB,RU,MC,MCC,RU,MC,MCC,RU,CL,MC,MCC,RU,MC,MCC
3
  Model,,,,,,,,,,,,,,,,,,,
4
  LLaMAntino-3-ANITA-8B-Inst-DPO-ITA,71.9,0.0,0.0,70.0,14.3,0.0,16.7,67.6,42.9,22.2,55.6,100.0,50.0,0.0,64.6,23.1,57.1,45.2,0.0
5
  Minerva-3B-base-v1.0,0.0,0.0,0.0,13.3,0.0,0.0,0.0,0.0,0.0,0.0,8.6,0.0,0.0,0.0,6.2,0.0,0.0,4.8,0.0
 
6
  claude-3-haiku,100.0,50.0,0.0,91.7,28.6,0.0,33.3,84.5,57.1,77.8,85.2,100.0,75.0,50.0,75.0,46.2,64.3,71.4,12.5
7
  claude-3-opus,100.0,100.0,100.0,98.3,71.4,100.0,33.3,93.0,85.7,88.9,93.8,0.0,100.0,50.0,85.4,61.5,71.4,90.5,25.0
8
  claude-3-sonnet,100.0,100.0,100.0,96.7,85.7,100.0,50.0,88.7,57.1,66.7,87.6,0.0,75.0,50.0,81.2,53.8,64.3,78.6,12.5
@@ -16,6 +17,7 @@ gpt-4-turbo,100.0,100.0,100.0,91.7,71.4,100.0,66.7,63.4,100.0,88.9,92.6,0.0,100.
16
  gpt-4o,78.1,100.0,100.0,83.3,71.4,100.0,66.7,66.2,85.7,77.8,80.2,0.0,100.0,0.0,68.8,38.5,78.6,38.1,12.5
17
  llama-3-70b-instruct,96.9,0.0,0.0,90.0,14.3,0.0,33.3,87.3,71.4,66.7,79.0,0.0,75.0,0.0,68.8,46.2,71.4,76.2,0.0
18
  llama-3-8b-instruct,65.6,0.0,0.0,66.7,0.0,0.0,16.7,57.8,28.6,11.1,42.0,0.0,0.0,0.0,54.2,15.4,28.6,57.1,0.0
 
19
  mistral-7b-instruct:nitro,71.9,0.0,0.0,66.7,0.0,0.0,16.7,59.2,14.3,33.3,50.6,0.0,25.0,0.0,50.0,23.1,28.6,57.1,0.0
20
  mixtral-8x22b-instruct,81.2,0.0,0.0,73.3,42.9,0.0,50.0,78.9,42.9,44.4,74.1,0.0,100.0,0.0,72.9,38.5,57.1,69.0,12.5
21
  mixtral-8x7b-instruct,96.9,0.0,0.0,76.7,14.3,0.0,16.7,80.3,57.1,55.6,71.6,0.0,75.0,0.0,68.8,30.8,57.1,69.0,0.0
 
3
  Model,,,,,,,,,,,,,,,,,,,
4
  LLaMAntino-3-ANITA-8B-Inst-DPO-ITA,71.9,0.0,0.0,70.0,14.3,0.0,16.7,67.6,42.9,22.2,55.6,100.0,50.0,0.0,64.6,23.1,57.1,45.2,0.0
5
  Minerva-3B-base-v1.0,0.0,0.0,0.0,13.3,0.0,0.0,0.0,0.0,0.0,0.0,8.6,0.0,0.0,0.0,6.2,0.0,0.0,4.8,0.0
6
+ Minerva_3B_Ties_1.0,6.2,0.0,0.0,28.3,0.0,0.0,0.0,32.4,28.6,11.1,39.5,100.0,0.0,0.0,47.9,7.7,7.1,42.9,25.0
7
  claude-3-haiku,100.0,50.0,0.0,91.7,28.6,0.0,33.3,84.5,57.1,77.8,85.2,100.0,75.0,50.0,75.0,46.2,64.3,71.4,12.5
8
  claude-3-opus,100.0,100.0,100.0,98.3,71.4,100.0,33.3,93.0,85.7,88.9,93.8,0.0,100.0,50.0,85.4,61.5,71.4,90.5,25.0
9
  claude-3-sonnet,100.0,100.0,100.0,96.7,85.7,100.0,50.0,88.7,57.1,66.7,87.6,0.0,75.0,50.0,81.2,53.8,64.3,78.6,12.5
 
17
  gpt-4o,78.1,100.0,100.0,83.3,71.4,100.0,66.7,66.2,85.7,77.8,80.2,0.0,100.0,0.0,68.8,38.5,78.6,38.1,12.5
18
  llama-3-70b-instruct,96.9,0.0,0.0,90.0,14.3,0.0,33.3,87.3,71.4,66.7,79.0,0.0,75.0,0.0,68.8,46.2,71.4,76.2,0.0
19
  llama-3-8b-instruct,65.6,0.0,0.0,66.7,0.0,0.0,16.7,57.8,28.6,11.1,42.0,0.0,0.0,0.0,54.2,15.4,28.6,57.1,0.0
20
+ maestrale-chat-v0.4-beta,65.6,0.0,0.0,66.7,14.3,0.0,0.0,62.0,0.0,33.3,60.5,0.0,25.0,0.0,62.5,23.1,35.7,71.4,0.0
21
  mistral-7b-instruct:nitro,71.9,0.0,0.0,66.7,0.0,0.0,16.7,59.2,14.3,33.3,50.6,0.0,25.0,0.0,50.0,23.1,28.6,57.1,0.0
22
  mixtral-8x22b-instruct,81.2,0.0,0.0,73.3,42.9,0.0,50.0,78.9,42.9,44.4,74.1,0.0,100.0,0.0,72.9,38.5,57.1,69.0,12.5
23
  mixtral-8x7b-instruct,96.9,0.0,0.0,76.7,14.3,0.0,16.7,80.3,57.1,55.6,71.6,0.0,75.0,0.0,68.8,30.8,57.1,69.0,0.0