[ { "model": "sabia-2-small", "name": "Sabiá-2 Small", "link": "https://www.maritaca.ai/", "date": "2024-04-12", "status": "full", "main_language": "Portuguese", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7172848145556333, "bluex": 0.5549374130737135, "oab_exams": 0.6364464692482916, "assin2_sts": 0.7053302344881672, "assin2_rte": 0.9121728362223306, "faquad_nli": 0.7575848453041435, "hatebr_offensive": 0.753800795680591, "portuguese_hate_speech": 0.6975326368290793, "tweetsentbr": 0.7119699374276466 }, "result_metrics_average": 0.7163399980921773, "result_metrics_npm": 0.5744541501392351 }, { "model": "sabia-2-medium", "name": "Sabiá-2 Medium", "link": "https://www.maritaca.ai/", "date": "2024-04-13", "status": "full", "main_language": "Portuguese", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8180545836249126, "bluex": 0.717663421418637, "oab_exams": 0.7321184510250569, "assin2_sts": 0.7804108376537757, "assin2_rte": 0.923459363368553, "faquad_nli": 0.7657657657657658, "hatebr_offensive": 0.8349989882997386, "portuguese_hate_speech": 0.7379326358571694, "tweetsentbr": 0.7269533040381798 }, "result_metrics_average": 0.7819285945613098, "result_metrics_npm": 0.6676121786922709 }, { "model": "gpt-3.5-turbo-0125", "name": "GPT-3.5 Turbo (0125)", "link": "https://www.openai.com/", "date": "2024-03-08", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7214835549335199, "bluex": 0.6244784422809457, "oab_exams": 0.5430523917995445, "assin2_sts": 0.7378460201077941, "assin2_rte": 0.8823038414050672, "faquad_nli": 0.746353108609074, "hatebr_offensive": 0.8056205941193919, "portuguese_hate_speech": 0.7363692688971499, "tweetsentbr": 0.7028981330613626 }, "result_metrics_average": 0.7222672616904278, "result_metrics_npm": 0.5841504766165372 }, { "model": "claude-3-haiku-20240307", "name": "Claude-3 Haiku (20240307)", "link": "https://www.claude.ai/", "date": "2024-04-13", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7718684394681595, "bluex": 0.6662030598052852, "oab_exams": 0.626879271070615, "assin2_sts": 0.7892124744168747, "assin2_rte": 0.9184462138121732, "faquad_nli": 0.6340996599941455, "hatebr_offensive": 0.8023698759439051, "portuguese_hate_speech": 0.7342166269560177, "tweetsentbr": 0.7303315733000207 }, "result_metrics_average": 0.7415141327519107, "result_metrics_npm": 0.6037151240886439 }, { "model": "gemini-1.0-pro", "name": "Gemini 1.0 Pro", "link": "https://ai.google.dev/", "date": "2024-03-08", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7130860741777467, "bluex": 0.5869262865090403, "oab_exams": 0.4988610478359909, "assin2_sts": 0.7058831239763663, "assin2_rte": 0.8945993304651698, "faquad_nli": 0.7070913567220611, "hatebr_offensive": 0.8086330094493972, "portuguese_hate_speech": 0.699119105113102, "tweetsentbr": 0.6803240476660983 }, "result_metrics_average": 0.6993914868794414, "result_metrics_npm": 0.551208000273598 }, { "model": "gemini-1.5-pro-preview-0409", "name": "Gemini 1.5 Pro Preview (0409)", "link": "https://cloud.google.com/vertex-ai", "date": "2024-04-15", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8509447165850245, "bluex": 0.7719054242002782, "oab_exams": 0.6888382687927107, "assin2_sts": 0.8159702278408203, "assin2_rte": 0.4651063829787234, "faquad_nli": 0.4114285714285714, "hatebr_offensive": 0.4422336328626444, "portuguese_hate_speech": 0.41441441441441434, "tweetsentbr": 0.7725066133902373 }, "result_metrics_average": 0.6259275836103805, "result_metrics_npm": 0.3545226679161642 }, { "model": "deepseek-v2-chat", "name": "DeepSeek-V2 Chat (API)", "link": "https://www.deepseek.com/", "date": "2024-05-18", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7844646606018194, "bluex": 0.6954102920723226, "oab_exams": 0.564009111617312, "assin2_sts": 0.8533174657651231, "assin2_rte": 0.9440170304568147, "faquad_nli": 0.7995469048381548, "hatebr_offensive": 0.8842986491071644, "portuguese_hate_speech": 0.7271736342651962, "tweetsentbr": 0.6835304759163984 }, "result_metrics_average": 0.7706409138489229, "result_metrics_npm": 0.655901521190756 }, { "model": "gemini-1.5-flash-preview-0514", "name": "Gemini 1.5 Flash Preview (0514)", "link": "https://cloud.google.com/vertex-ai", "date": "2024-05-18", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8264520643806857, "bluex": 0.7482614742698191, "oab_exams": 0.6419134396355353, "assin2_sts": 0.841655158151231, "assin2_rte": 0.9362097477374545, "faquad_nli": 0.8092185592185592, "hatebr_offensive": 0.9099110141445836, "portuguese_hate_speech": 0.6875904275305673, "tweetsentbr": 0.7219800292667018 }, "result_metrics_average": 0.7914657682594597, "result_metrics_npm": 0.6834036936130392 }, { "model": "gemini-1.5-flash", "name": "Gemini 1.5 Flash", "link": "https://cloud.google.com/vertex-ai", "date": "2024-08-09", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8306508047585724, "bluex": 0.7579972183588317, "oab_exams": 0.6446469248291572, "assin2_sts": 0.838806085610371, "assin2_rte": 0.9366169973822607, "faquad_nli": 0.7963910785668922, "hatebr_offensive": 0.9092078461170015, "portuguese_hate_speech": 0.6932563987219857, "tweetsentbr": 0.7312948963367732 }, "result_metrics_average": 0.7932075834090939, "result_metrics_npm": 0.6855338135928848 }, { "model": "gpt-4o-mini-2024-07-18", "name": "GPT 4o Mini (2024-07-18)", "link": "https://www.openai.com/", "date": "2024-07-25", "status": "full", "main_language": "English", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.7669699090272918, "bluex": 0.6842837273991655, "oab_exams": 0.6013667425968109, "assin2_sts": 0.7259038954527597, "assin2_rte": 0.942809846745341, "faquad_nli": 0.819807735300693, "hatebr_offensive": 0.8682357029532165, "portuguese_hate_speech": 0.7501413502853012, "tweetsentbr": 0.7509303825869922 }, "result_metrics_average": 0.7678276991497301, "result_metrics_npm": 0.6595966999910003 }, { "model": "nemotron-4-340b-instruct", "name": "nvidia/Nemotron-4-340B-Instruct (Nvidia API)", "link": "https://build.nvidia.com/nvidia/nemotron-4-340b-instruct", "date": "2024-06-30", "status": "full", "main_language": "English", "model_type": "chat", "params": 340.0, "result_metrics": { "enem_challenge": 0.6648005598320503, "bluex": 0.6578581363004172, "oab_exams": 0.7020501138952164, "assin2_sts": 0.7857731021403329, "assin2_rte": 0.9489354458928496, "faquad_nli": 0.8194444444444444, "hatebr_offensive": 0.8641580001234928, "portuguese_hate_speech": 0.7761835184102864, "tweetsentbr": 0.780880021326841 }, "result_metrics_average": 0.7777870380406591, "result_metrics_npm": 0.6740728488043128 }, { "model": "llama_405b_instruct", "name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)", "link": "https://cloud.google.com/vertex-ai", "date": "2024-08-20", "status": "full", "main_language": "English", "model_type": "chat", "params": 406.0, "result_metrics": { "enem_challenge": 0.8523442967109867, "bluex": 0.8011126564673157, "oab_exams": 0.7640091116173121, "assin2_sts": 0.7888441732870783, "assin2_rte": 0.9476445477916471, "faquad_nli": 0.825063276593557, "hatebr_offensive": 0.9073940659389119, "portuguese_hate_speech": 0.7191480935512969, "tweetsentbr": 0.7821434639106575 }, "result_metrics_average": 0.8208559650965292, "result_metrics_npm": 0.7286932366792048 }, { "model": "sabia-3", "name": "Sabiá-3", "link": "https://www.maritaca.ai/", "date": "2024-08-20", "status": "full", "main_language": "Portuguese", "model_type": "proprietary", "result_metrics": { "enem_challenge": 0.8789363191042687, "bluex": 0.7899860917941586, "oab_exams": 0.8391799544419134, "assin2_sts": 0.8253863689009022, "assin2_rte": 0.9477034821619312, "faquad_nli": 0.8243848812618203, "hatebr_offensive": 0.8278737774590023, "portuguese_hate_speech": 0.7241071428571428, "tweetsentbr": 0.7510613086648664 }, "result_metrics_average": 0.8231799251828895, "result_metrics_npm": 0.7241097388486535 } ]