{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100\n", "200\n", "300\n", "400\n", "500\n", "600\n", "700\n", "800\n", "900\n", "1000\n", "1100\n", "1200\n", "1300\n", "1400\n", "1500\n", "1600\n", "1700\n", "1800\n", "full_model_names\n", "1889\n", "organization_names\n", "12\n", "['Parameters', 'drop|3', 'gsm8k', 'MMLU_average', 'winogrande', 'all', 'arc:challenge|25', 'hellaswag|10', 'MMLU_abstract_algebra', 'MMLU_anatomy', 'MMLU_astronomy', 'MMLU_business_ethics', 'MMLU_clinical_knowledge', 'MMLU_college_biology', 'MMLU_college_chemistry', 'MMLU_college_computer_science', 'MMLU_college_mathematics', 'MMLU_college_medicine', 'MMLU_college_physics', 'MMLU_computer_security', 'MMLU_conceptual_physics', 'MMLU_econometrics', 'MMLU_electrical_engineering', 'MMLU_elementary_mathematics', 'MMLU_formal_logic', 'MMLU_global_facts', 'MMLU_high_school_biology', 'MMLU_high_school_chemistry', 'MMLU_high_school_computer_science', 'MMLU_high_school_european_history', 'MMLU_high_school_geography', 'MMLU_high_school_government_and_politics', 'MMLU_high_school_macroeconomics', 'MMLU_high_school_mathematics', 'MMLU_high_school_microeconomics', 'MMLU_high_school_physics', 'MMLU_high_school_psychology', 'MMLU_high_school_statistics', 'MMLU_high_school_us_history', 'MMLU_high_school_world_history', 'MMLU_human_aging', 'MMLU_human_sexuality', 'MMLU_international_law', 'MMLU_jurisprudence', 'MMLU_logical_fallacies', 'MMLU_machine_learning', 'MMLU_management', 'MMLU_marketing', 'MMLU_medical_genetics', 'MMLU_miscellaneous', 'MMLU_moral_disputes', 'MMLU_moral_scenarios', 'MMLU_nutrition', 'MMLU_philosophy', 'MMLU_prehistory', 'MMLU_professional_accounting', 'MMLU_professional_law', 'MMLU_professional_medicine', 'MMLU_professional_psychology', 'MMLU_public_relations', 'MMLU_security_studies', 'MMLU_sociology', 'MMLU_us_foreign_policy', 'MMLU_virology', 'MMLU_world_religions', 'truthfulqa:mc|0', 'full_model_name']\n" ] } ], "source": [ "from result_data_processor import ResultDataProcessor\n", "result = ResultDataProcessor()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | URL | \n", "full_model_name | \n", "Parameters | \n", "MMLU_average | \n", "arc:challenge|25 | \n", "hellaswag|10 | \n", "MMLU_abstract_algebra | \n", "MMLU_anatomy | \n", "MMLU_astronomy | \n", "MMLU_business_ethics | \n", "... | \n", "MMLU_professional_accounting | \n", "MMLU_professional_law | \n", "MMLU_professional_medicine | \n", "MMLU_professional_psychology | \n", "MMLU_public_relations | \n", "MMLU_security_studies | \n", "MMLU_sociology | \n", "MMLU_us_foreign_policy | \n", "MMLU_virology | \n", "MMLU_world_religions | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SparseOPT-1.3B | \n", "https://huggingface.co/shaohang/SparseOPT-1.3B | \n", "shaohang/SparseOPT-1.3B | \n", "1.3 | \n", "0.255963 | \n", "0.240614 | \n", "0.383689 | \n", "0.22 | \n", "0.214815 | \n", "0.157895 | \n", "0.20 | \n", "... | \n", "0.262411 | \n", "0.238592 | \n", "0.448529 | \n", "0.254902 | \n", "0.236364 | \n", "0.171429 | \n", "0.228856 | \n", "0.27 | \n", "0.283133 | \n", "0.216374 | \n", "
Athena-v1 | \n", "https://huggingface.co/IkariDev/Athena-v1 | \n", "IkariDev/Athena-v1 | \n", "NaN | \n", "0.556052 | \n", "0.560580 | \n", "0.631548 | \n", "0.31 | \n", "0.496296 | \n", "0.526316 | \n", "0.58 | \n", "... | \n", "0.404255 | \n", "0.392438 | \n", "0.525735 | \n", "0.540850 | \n", "0.645455 | \n", "0.640816 | \n", "0.751244 | \n", "0.83 | \n", "0.493976 | \n", "0.725146 | \n", "
Athena-tmp | \n", "https://huggingface.co/IkariDev/Athena-tmp | \n", "IkariDev/Athena-tmp | \n", "NaN | \n", "0.588685 | \n", "0.567406 | \n", "0.621888 | \n", "0.29 | \n", "0.518519 | \n", "0.638158 | \n", "0.62 | \n", "... | \n", "0.450355 | \n", "0.462842 | \n", "0.569853 | \n", "0.588235 | \n", "0.645455 | \n", "0.653061 | \n", "0.721393 | \n", "0.81 | \n", "0.463855 | \n", "0.801170 | \n", "
13B-Legerdemain-L2 | \n", "https://huggingface.co/CalderaAI/13B-Legerdema... | \n", "CalderaAI/13B-Legerdemain-L2 | \n", "13.0 | \n", "0.560030 | \n", "0.573379 | \n", "0.635431 | \n", "0.36 | \n", "0.525926 | \n", "0.572368 | \n", "0.53 | \n", "... | \n", "0.429078 | \n", "0.424381 | \n", "0.522059 | \n", "0.532680 | \n", "0.609091 | \n", "0.636735 | \n", "0.766169 | \n", "0.87 | \n", "0.427711 | \n", "0.777778 | \n", "
13B-Ouroboros | \n", "https://huggingface.co/CalderaAI/13B-Ouroboros | \n", "CalderaAI/13B-Ouroboros | \n", "13.0 | \n", "0.514311 | \n", "0.560580 | \n", "0.624378 | \n", "0.31 | \n", "0.466667 | \n", "0.506579 | \n", "0.52 | \n", "... | \n", "0.365248 | \n", "0.405476 | \n", "0.481618 | \n", "0.524510 | \n", "0.609091 | \n", "0.538776 | \n", "0.691542 | \n", "0.83 | \n", "0.457831 | \n", "0.760234 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
Robin-v2 | \n", "https://huggingface.co/HanningZhang/Robin-v2 | \n", "HanningZhang/Robin-v2 | \n", "NaN | \n", "0.392680 | \n", "0.435154 | \n", "0.545310 | \n", "0.32 | \n", "0.437037 | \n", "0.335526 | \n", "0.46 | \n", "... | \n", "0.290780 | \n", "0.302477 | \n", "0.382353 | \n", "0.374183 | \n", "0.445455 | \n", "0.326531 | \n", "0.457711 | \n", "0.59 | \n", "0.379518 | \n", "0.590643 | \n", "
CodeUp-Llama-2-13b-chat-hf | \n", "https://huggingface.co/deepse/CodeUp-Llama-2-1... | \n", "deepse/CodeUp-Llama-2-13b-chat-hf | \n", "13.0 | \n", "0.546262 | \n", "0.558020 | \n", "0.629257 | \n", "0.31 | \n", "0.474074 | \n", "0.546053 | \n", "0.53 | \n", "... | \n", "0.390071 | \n", "0.391786 | \n", "0.500000 | \n", "0.544118 | \n", "0.663636 | \n", "0.636735 | \n", "0.751244 | \n", "0.81 | \n", "0.481928 | \n", "0.730994 | \n", "
Hermes-Platypus2-mini-7B | \n", "https://huggingface.co/edor/Hermes-Platypus2-m... | \n", "edor/Hermes-Platypus2-mini-7B | \n", "7.0 | \n", "0.470828 | \n", "0.523038 | \n", "0.601573 | \n", "0.33 | \n", "0.488889 | \n", "0.421053 | \n", "0.48 | \n", "... | \n", "0.390071 | \n", "0.353977 | \n", "0.470588 | \n", "0.446078 | \n", "0.518182 | \n", "0.563265 | \n", "0.621891 | \n", "0.68 | \n", "0.421687 | \n", "0.637427 | \n", "
Stable-Platypus2-mini-7B | \n", "https://huggingface.co/edor/Stable-Platypus2-m... | \n", "edor/Stable-Platypus2-mini-7B | \n", "7.0 | \n", "0.517800 | \n", "0.523891 | \n", "0.596594 | \n", "0.37 | \n", "0.488889 | \n", "0.407895 | \n", "0.50 | \n", "... | \n", "0.390071 | \n", "0.391786 | \n", "0.518382 | \n", "0.509804 | \n", "0.618182 | \n", "0.657143 | \n", "0.631841 | \n", "0.73 | \n", "0.427711 | \n", "0.695906 | \n", "
llava-v1.5-13b-hf | \n", "https://huggingface.co/Community-LM/llava-v1.5... | \n", "Community-LM/llava-v1.5-13b-hf | \n", "13.0 | \n", "0.568868 | \n", "0.532423 | \n", "0.601175 | \n", "0.30 | \n", "0.496296 | \n", "0.585526 | \n", "0.67 | \n", "... | \n", "0.407801 | \n", "0.415906 | \n", "0.547794 | \n", "0.578431 | \n", "0.600000 | \n", "0.653061 | \n", "0.761194 | \n", "0.81 | \n", "0.506024 | \n", "0.795322 | \n", "
1121 rows × 63 columns
\n", "