File size: 6,434 Bytes
1dbfacb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2328c3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74168f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f74bf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dbfacb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
[
    {
        "model": "sabia-2-small",
        "name": "Sabiá-2 Small",
        "link": "https://www.maritaca.ai/",
        "date": "2024-04-12",
        "status": "full",
        "main_language": "Portuguese",
        "result_metrics": {
            "enem_challenge": 0.7172848145556333,
            "bluex": 0.5549374130737135,
            "oab_exams": 0.6364464692482916,
            "assin2_sts": 0.7053302344881672,
            "assin2_rte": 0.9121728362223306,
            "faquad_nli": 0.7575848453041435,
            "hatebr_offensive": 0.5025338637870607,
            "portuguese_hate_speech": 0.4650217578860529,
            "tweetsentbr": 0.533977453070735
        },
        "result_metrics_average": 0.6428099652929031,
        "result_metrics_npm": 0.43960062672137007
    },
    {
        "model": "sabia-2-medium",
        "name": "Sabiá-2 Medium",
        "link": "https://www.maritaca.ai/",
        "date": "2024-04-13",
        "status": "full",
        "main_language": "Portuguese",
        "result_metrics": {
            "enem_challenge": 0.8180545836249126,
            "bluex": 0.717663421418637,
            "oab_exams": 0.7321184510250569,
            "assin2_sts": 0.7804108376537757,
            "assin2_rte": 0.923459363368553,
            "faquad_nli": 0.7657657657657658,
            "hatebr_offensive": 0.8349989882997386,
            "portuguese_hate_speech": 0.7379326358571694,
            "tweetsentbr": 0.7269533040381798
        },
        "result_metrics_average": 0.7819285945613098,
        "result_metrics_npm": 0.6676121786922709
    },
    {
        "model": "gpt-3.5-turbo-0125",
        "name": "GPT-3.5 Turbo (0125)",
        "link": "https://www.openai.com/",
        "date": "2024-03-08",
        "status": "full",
        "main_language": "English",
        "result_metrics": {
            "enem_challenge": 0.7214835549335199,
            "bluex": 0.6244784422809457,
            "oab_exams": 0.5430523917995445,
            "assin2_sts": 0.7378460201077941,
            "assin2_rte": 0.8823038414050672,
            "faquad_nli": 0.746353108609074,
            "hatebr_offensive": 0.8056205941193919,
            "portuguese_hate_speech": 0.7363692688971499,
            "tweetsentbr": 0.7028981330613626
        },
        "result_metrics_average": 0.7222672616904278,
        "result_metrics_npm": 0.5841504766165372
    },
    {
        "model": "claude-3-haiku-20240307",
        "name": "Claude-3 Haiku (20240307)",
        "link": "https://www.claude.ai/",
        "date": "2024-04-13",
        "status": "full",
        "main_language": "English",
        "result_metrics": {
            "enem_challenge": 0.7718684394681595,
            "bluex": 0.6662030598052852,
            "oab_exams": 0.626879271070615,
            "assin2_sts": 0.7892124744168747,
            "assin2_rte": 0.9184462138121732,
            "faquad_nli": 0.6340996599941455,
            "hatebr_offensive": 0.8023698759439051,
            "portuguese_hate_speech": 0.7342166269560177,
            "tweetsentbr": 0.5477486799750156
        },
        "result_metrics_average": 0.7212271446046878,
        "result_metrics_npm": 0.5735261536314672
    },
    {
        "model": "gemini-1.0-pro",
        "name": "Gemini 1.0 Pro",
        "link": "https://ai.google.dev/",
        "date": "2024-03-08",
        "status": "full",
        "main_language": "English",
        "result_metrics": {
            "enem_challenge": 0.7130860741777467,
            "bluex": 0.5869262865090403,
            "oab_exams": 0.4988610478359909,
            "assin2_sts": 0.7058831239763663,
            "assin2_rte": 0.8945993304651698,
            "faquad_nli": 0.7070913567220611,
            "hatebr_offensive": 0.8086330094493972,
            "portuguese_hate_speech": 0.699119105113102,
            "tweetsentbr": 0.6803240476660983
        },
        "result_metrics_average": 0.6993914868794414,
        "result_metrics_npm": 0.551208000273598
    },
    {
        "model": "gemini-1.5-pro-preview-0409",
        "name": "Gemini 1.5 Pro Preview (0409)",
        "link": "https://cloud.google.com/vertex-ai",
        "date": "2024-04-15",
        "status": "full",
        "main_language": "English",
        "result_metrics": {
            "enem_challenge": 0.8509447165850245,
            "bluex": 0.7719054242002782,
            "oab_exams": 0.6888382687927107,
            "assin2_sts": 0.8159702278408203,
            "assin2_rte": 0.9328989988467518,
            "faquad_nli": 0.7290756302521009,
            "hatebr_offensive": 0.8697698647467024,
            "portuguese_hate_speech": 0.7539414414414414,
            "tweetsentbr": 0.772785080895884
        },
        "result_metrics_average": 0.7984588504001905,
        "result_metrics_npm": 0.6908188311933006
    },
    {
        "model": "deepseek-v2-chat",
        "name": "DeepSeek-V2 Chat (API)",
        "link": "https://www.deepseek.com/",
        "date": "2024-05-18",
        "status": "full",
        "main_language": "English",
        "result_metrics": {
            "enem_challenge": 0.7844646606018194,
            "bluex": 0.6954102920723226,
            "oab_exams": 0.564009111617312,
            "assin2_sts": 0.8533174657651231,
            "assin2_rte": 0.9440170304568147,
            "faquad_nli": 0.7995469048381548,
            "hatebr_offensive": 0.8842986491071644,
            "portuguese_hate_speech": 0.7271736342651962,
            "tweetsentbr": 0.6835304759163984
        },
        "result_metrics_average": 0.7706409138489229,
        "result_metrics_npm": 0.655901521190756
    },
    {
        "model": "gemini-1.5-flash-preview-0514",
        "name": "Gemini 1.5 Flash Preview (0514)",
        "link": "https://cloud.google.com/vertex-ai",
        "date": "2024-05-18",
        "status": "full",
        "main_language": "English",
        "result_metrics": {
            "enem_challenge": 0.8264520643806857,
            "bluex": 0.7482614742698191,
            "oab_exams": 0.6419134396355353,
            "assin2_sts": 0.841655158151231,
            "assin2_rte": 0.9362097477374545,
            "faquad_nli": 0.8092185592185592,
            "hatebr_offensive": 0.9099110141445836,
            "portuguese_hate_speech": 0.6875904275305673,
            "tweetsentbr": 0.7219800292667018
        },
        "result_metrics_average": 0.7914657682594597,
        "result_metrics_npm": 0.6834036936130392
    }
]