gagan3012
/

MetaModel

       value: [1, 0.5, 0.7, 0.3, 0]
     - value: 0.5
 dtype: bfloat16
+```
+# Dataset Card for Evaluation run of gagan3012/MetaModel
+<!-- Provide a quick summary of the dataset. -->
+Dataset automatically created during the evaluation run of model [gagan3012/MetaModel](https://huggingface.co/gagan3012/MetaModel) on the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard).
+The dataset is composed of 63 configuration, each one coresponding to one of the evaluated task.
+The dataset has been created from 1 run(s). Each run can be found as a specific split in each configuration, the split being named using the timestamp of the run.The "train" split is always pointing to the latest results.
+An additional configuration "results" store all the aggregated results of the run (and is used to compute and display the aggregated metrics on the [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)).
+To load the details from a run, you can for instance do the following:
+```python
+from datasets import load_dataset
+data = load_dataset("open-llm-leaderboard/details_gagan3012__MetaModel",
+	"harness_winogrande_5",
+	split="train")
+```
+## Latest results
+These are the [latest results from run 2024-01-04T14:09:43.780941](https://huggingface.co/datasets/open-llm-leaderboard/details_gagan3012__MetaModel/blob/main/results_2024-01-04T14-09-43.780941.json)(note that their might be results for other tasks in the repos if successive evals didn't cover the same tasks. You find each in the results and the "latest" split for each eval):
+```python
+{
+    "all": {
+        "acc": 0.6664380298886512,
+        "acc_stderr": 0.031642195230944255,
+        "acc_norm": 0.6671639222858992,
+        "acc_norm_stderr": 0.03228745343467652,
+        "mc1": 0.5691554467564259,
+        "mc1_stderr": 0.01733527247533237,
+        "mc2": 0.7184177934834866,
+        "mc2_stderr": 0.014995634120330182
+    },
+    "harness|arc:challenge|25": {
+        "acc": 0.6843003412969283,
+        "acc_stderr": 0.013582571095815291,
+        "acc_norm": 0.7107508532423208,
+        "acc_norm_stderr": 0.01325001257939344
+    },
+    "harness|hellaswag|10": {
+        "acc": 0.7132045409281019,
+        "acc_stderr": 0.004513409114983828,
+        "acc_norm": 0.8844851623182632,
+        "acc_norm_stderr": 0.0031898897894046684
+    },
+    "harness|hendrycksTest-abstract_algebra|5": {
+        "acc": 0.43,
+        "acc_stderr": 0.049756985195624284,
+        "acc_norm": 0.43,
+        "acc_norm_stderr": 0.049756985195624284
+    },
+    "harness|hendrycksTest-anatomy|5": {
+        "acc": 0.6148148148148148,
+        "acc_stderr": 0.04203921040156279,
+        "acc_norm": 0.6148148148148148,
+        "acc_norm_stderr": 0.04203921040156279
+    },
+    "harness|hendrycksTest-astronomy|5": {
+        "acc": 0.743421052631579,
+        "acc_stderr": 0.0355418036802569,
+        "acc_norm": 0.743421052631579,
+        "acc_norm_stderr": 0.0355418036802569
+    },
+    "harness|hendrycksTest-business_ethics|5": {
+        "acc": 0.75,
+        "acc_stderr": 0.04351941398892446,
+        "acc_norm": 0.75,
+        "acc_norm_stderr": 0.04351941398892446
+    },
+    "harness|hendrycksTest-clinical_knowledge|5": {
+        "acc": 0.6830188679245283,
+        "acc_stderr": 0.02863723563980089,
+        "acc_norm": 0.6830188679245283,
+        "acc_norm_stderr": 0.02863723563980089
+    },
+    "harness|hendrycksTest-college_biology|5": {
+        "acc": 0.7638888888888888,
+        "acc_stderr": 0.03551446610810826,
+        "acc_norm": 0.7638888888888888,
+        "acc_norm_stderr": 0.03551446610810826
+    },
+    "harness|hendrycksTest-college_chemistry|5": {
+        "acc": 0.47,
+        "acc_stderr": 0.050161355804659205,
+        "acc_norm": 0.47,
+        "acc_norm_stderr": 0.050161355804659205
+    },
+    "harness|hendrycksTest-college_computer_science|5": {
+        "acc": 0.48,
+        "acc_stderr": 0.05021167315686781,
+        "acc_norm": 0.48,
+        "acc_norm_stderr": 0.05021167315686781
+    },
+    "harness|hendrycksTest-college_mathematics|5": {
+        "acc": 0.32,
+        "acc_stderr": 0.046882617226215034,
+        "acc_norm": 0.32,
+        "acc_norm_stderr": 0.046882617226215034
+    },
+    "harness|hendrycksTest-college_medicine|5": {
+        "acc": 0.6647398843930635,
+        "acc_stderr": 0.03599586301247077,
+        "acc_norm": 0.6647398843930635,
+        "acc_norm_stderr": 0.03599586301247077
+    },
+    "harness|hendrycksTest-college_physics|5": {
+        "acc": 0.38235294117647056,
+        "acc_stderr": 0.04835503696107223,
+        "acc_norm": 0.38235294117647056,
+        "acc_norm_stderr": 0.04835503696107223
+    },
+    "harness|hendrycksTest-computer_security|5": {
+        "acc": 0.75,
+        "acc_stderr": 0.04351941398892446,
+        "acc_norm": 0.75,
+        "acc_norm_stderr": 0.04351941398892446
+    },
+    "harness|hendrycksTest-conceptual_physics|5": {
+        "acc": 0.625531914893617,
+        "acc_stderr": 0.03163910665367291,
+        "acc_norm": 0.625531914893617,
+        "acc_norm_stderr": 0.03163910665367291
+    },
+    "harness|hendrycksTest-econometrics|5": {
+        "acc": 0.4824561403508772,
+        "acc_stderr": 0.04700708033551038,
+        "acc_norm": 0.4824561403508772,
+        "acc_norm_stderr": 0.04700708033551038
+    },
+    "harness|hendrycksTest-electrical_engineering|5": {
+        "acc": 0.6413793103448275,
+        "acc_stderr": 0.039966295748767186,
+        "acc_norm": 0.6413793103448275,
+        "acc_norm_stderr": 0.039966295748767186
+    },
+    "harness|hendrycksTest-elementary_mathematics|5": {
+        "acc": 0.5,
+        "acc_stderr": 0.025751310131230234,
+        "acc_norm": 0.5,
+        "acc_norm_stderr": 0.025751310131230234
+    },
+    "harness|hendrycksTest-formal_logic|5": {
+        "acc": 0.42857142857142855,
+        "acc_stderr": 0.0442626668137991,
+        "acc_norm": 0.42857142857142855,
+        "acc_norm_stderr": 0.0442626668137991
+    },
+    "harness|hendrycksTest-global_facts|5": {
+        "acc": 0.35,
+        "acc_stderr": 0.047937248544110196,
+        "acc_norm": 0.35,
+        "acc_norm_stderr": 0.047937248544110196
+    },
+    "harness|hendrycksTest-high_school_biology|5": {
+        "acc": 0.8129032258064516,
+        "acc_stderr": 0.022185710092252252,
+        "acc_norm": 0.8129032258064516,
+        "acc_norm_stderr": 0.022185710092252252
+    },
+    "harness|hendrycksTest-high_school_chemistry|5": {
+        "acc": 0.5073891625615764,
+        "acc_stderr": 0.035176035403610105,
+        "acc_norm": 0.5073891625615764,
+        "acc_norm_stderr": 0.035176035403610105
+    },
+    "harness|hendrycksTest-high_school_computer_science|5": {
+        "acc": 0.72,
+        "acc_stderr": 0.04512608598542128,
+        "acc_norm": 0.72,
+        "acc_norm_stderr": 0.04512608598542128
+    },
+    "harness|hendrycksTest-high_school_european_history|5": {
+        "acc": 0.8121212121212121,
+        "acc_stderr": 0.03050193405942914,
+        "acc_norm": 0.8121212121212121,
+        "acc_norm_stderr": 0.03050193405942914
+    },
+    "harness|hendrycksTest-high_school_geography|5": {
+        "acc": 0.8636363636363636,
+        "acc_stderr": 0.024450155973189835,
+        "acc_norm": 0.8636363636363636,
+        "acc_norm_stderr": 0.024450155973189835
+    },
+    "harness|hendrycksTest-high_school_government_and_politics|5": {
+        "acc": 0.8963730569948186,
+        "acc_stderr": 0.021995311963644244,
+        "acc_norm": 0.8963730569948186,
+        "acc_norm_stderr": 0.021995311963644244
+    },
+    "harness|hendrycksTest-high_school_macroeconomics|5": {
+        "acc": 0.6692307692307692,
+        "acc_stderr": 0.02385479568097114,
+        "acc_norm": 0.6692307692307692,
+        "acc_norm_stderr": 0.02385479568097114
+    },
+    "harness|hendrycksTest-high_school_mathematics|5": {
+        "acc": 0.37037037037037035,
+        "acc_stderr": 0.02944316932303154,
+        "acc_norm": 0.37037037037037035,
+        "acc_norm_stderr": 0.02944316932303154
+    },
+    "harness|hendrycksTest-high_school_microeconomics|5": {
+        "acc": 0.7142857142857143,
+        "acc_stderr": 0.029344572500634332,
+        "acc_norm": 0.7142857142857143,
+        "acc_norm_stderr": 0.029344572500634332
+    },
+    "harness|hendrycksTest-high_school_physics|5": {
+        "acc": 0.3708609271523179,
+        "acc_stderr": 0.03943966699183629,
+        "acc_norm": 0.3708609271523179,
+        "acc_norm_stderr": 0.03943966699183629
+    },
+    "harness|hendrycksTest-high_school_psychology|5": {
+        "acc": 0.8422018348623853,
+        "acc_stderr": 0.01563002297009246,
+        "acc_norm": 0.8422018348623853,
+        "acc_norm_stderr": 0.01563002297009246
+    },
+    "harness|hendrycksTest-high_school_statistics|5": {
+        "acc": 0.5740740740740741,
+        "acc_stderr": 0.03372343271653062,
+        "acc_norm": 0.5740740740740741,
+        "acc_norm_stderr": 0.03372343271653062
+    },
+    "harness|hendrycksTest-high_school_us_history|5": {
+        "acc": 0.8578431372549019,
+        "acc_stderr": 0.02450980392156862,
+        "acc_norm": 0.8578431372549019,
+        "acc_norm_stderr": 0.02450980392156862
+    },
+    "harness|hendrycksTest-high_school_world_history|5": {
+        "acc": 0.8565400843881856,
+        "acc_stderr": 0.022818291821017012,
+        "acc_norm": 0.8565400843881856,
+        "acc_norm_stderr": 0.022818291821017012
+    },
+    "harness|hendrycksTest-human_aging|5": {
+        "acc": 0.672645739910314,
+        "acc_stderr": 0.03149384670994131,
+        "acc_norm": 0.672645739910314,
+        "acc_norm_stderr": 0.03149384670994131
+    },
+    "harness|hendrycksTest-human_sexuality|5": {
+        "acc": 0.7557251908396947,
+        "acc_stderr": 0.03768335959728743,
+        "acc_norm": 0.7557251908396947,
+        "acc_norm_stderr": 0.03768335959728743
+    },
+    "harness|hendrycksTest-international_law|5": {
+        "acc": 0.7851239669421488,
+        "acc_stderr": 0.037494924487096966,
+        "acc_norm": 0.7851239669421488,
+        "acc_norm_stderr": 0.037494924487096966
+    },
+    "harness|hendrycksTest-jurisprudence|5": {
+        "acc": 0.8055555555555556,
+        "acc_stderr": 0.038260763248848646,
+        "acc_norm": 0.8055555555555556,
+        "acc_norm_stderr": 0.038260763248848646
+    },
+    "harness|hendrycksTest-logical_fallacies|5": {
+        "acc": 0.754601226993865,
+        "acc_stderr": 0.03380939813943354,
+        "acc_norm": 0.754601226993865,
+        "acc_norm_stderr": 0.03380939813943354
+    },
+    "harness|hendrycksTest-machine_learning|5": {
+        "acc": 0.4732142857142857,
+        "acc_stderr": 0.047389751192741546,
+        "acc_norm": 0.4732142857142857,
+        "acc_norm_stderr": 0.047389751192741546
+    },
+    "harness|hendrycksTest-management|5": {
+        "acc": 0.8446601941747572,
+        "acc_stderr": 0.035865947385739734,
+        "acc_norm": 0.8446601941747572,
+        "acc_norm_stderr": 0.035865947385739734
+    },
+    "harness|hendrycksTest-marketing|5": {
+        "acc": 0.8589743589743589,
+        "acc_stderr": 0.02280138253459753,
+        "acc_norm": 0.8589743589743589,
+        "acc_norm_stderr": 0.02280138253459753
+    },
+    "harness|hendrycksTest-medical_genetics|5": {
+        "acc": 0.7,
+        "acc_stderr": 0.046056618647183814,
+        "acc_norm": 0.7,
+        "acc_norm_stderr": 0.046056618647183814
+    },
+    "harness|hendrycksTest-miscellaneous|5": {
+        "acc": 0.8084291187739464,
+        "acc_stderr": 0.014072859310451949,
+        "acc_norm": 0.8084291187739464,
+        "acc_norm_stderr": 0.014072859310451949
+    },
+    "harness|hendrycksTest-moral_disputes|5": {
+        "acc": 0.7572254335260116,
+        "acc_stderr": 0.023083658586984204,
+        "acc_norm": 0.7572254335260116,
+        "acc_norm_stderr": 0.023083658586984204
+    },
+    "harness|hendrycksTest-moral_scenarios|5": {
+        "acc": 0.39664804469273746,
+        "acc_stderr": 0.016361354769822468,
+        "acc_norm": 0.39664804469273746,
+        "acc_norm_stderr": 0.016361354769822468
+    },
+    "harness|hendrycksTest-nutrition|5": {
+        "acc": 0.7581699346405228,
+        "acc_stderr": 0.024518195641879334,
+        "acc_norm": 0.7581699346405228,
+        "acc_norm_stderr": 0.024518195641879334
+    },
+    "harness|hendrycksTest-philosophy|5": {
+        "acc": 0.7202572347266881,
+        "acc_stderr": 0.025494259350694905,
+        "acc_norm": 0.7202572347266881,
+        "acc_norm_stderr": 0.025494259350694905
+    },
+    "harness|hendrycksTest-prehistory|5": {
+        "acc": 0.7777777777777778,
+        "acc_stderr": 0.02313237623454333,
+        "acc_norm": 0.7777777777777778,
+        "acc_norm_stderr": 0.02313237623454333
+    },
+    "harness|hendrycksTest-professional_accounting|5": {
+        "acc": 0.5035460992907801,
+        "acc_stderr": 0.02982674915328092,
+        "acc_norm": 0.5035460992907801,
+        "acc_norm_stderr": 0.02982674915328092
+    },
+    "harness|hendrycksTest-professional_law|5": {
+        "acc": 0.49478487614080835,
+        "acc_stderr": 0.012769541449652547,
+        "acc_norm": 0.49478487614080835,
+        "acc_norm_stderr": 0.012769541449652547
+    },
+    "harness|hendrycksTest-professional_medicine|5": {
+        "acc": 0.75,
+        "acc_stderr": 0.026303648393696036,
+        "acc_norm": 0.75,
+        "acc_norm_stderr": 0.026303648393696036
+    },
+    "harness|hendrycksTest-professional_psychology|5": {
+        "acc": 0.6813725490196079,
+        "acc_stderr": 0.018850084696468712,
+        "acc_norm": 0.6813725490196079,
+        "acc_norm_stderr": 0.018850084696468712
+    },
+    "harness|hendrycksTest-public_relations|5": {
+        "acc": 0.6818181818181818,
+        "acc_stderr": 0.04461272175910509,
+        "acc_norm": 0.6818181818181818,
+        "acc_norm_stderr": 0.04461272175910509
+    },
+    "harness|hendrycksTest-security_studies|5": {
+        "acc": 0.746938775510204,
+        "acc_stderr": 0.027833023871399677,
+        "acc_norm": 0.746938775510204,
+        "acc_norm_stderr": 0.027833023871399677
+    },
+    "harness|hendrycksTest-sociology|5": {
+        "acc": 0.8258706467661692,
+        "acc_stderr": 0.026814951200421603,
+        "acc_norm": 0.8258706467661692,
+        "acc_norm_stderr": 0.026814951200421603
+    },
+    "harness|hendrycksTest-us_foreign_policy|5": {
+        "acc": 0.91,
+        "acc_stderr": 0.028762349126466125,
+        "acc_norm": 0.91,
+        "acc_norm_stderr": 0.028762349126466125
+    },
+    "harness|hendrycksTest-virology|5": {
+        "acc": 0.5783132530120482,
+        "acc_stderr": 0.038444531817709175,
+        "acc_norm": 0.5783132530120482,
+        "acc_norm_stderr": 0.038444531817709175
+    },
+    "harness|hendrycksTest-world_religions|5": {
+        "acc": 0.7777777777777778,
+        "acc_stderr": 0.03188578017686398,
+        "acc_norm": 0.7777777777777778,
+        "acc_norm_stderr": 0.03188578017686398
+    },
+    "harness|truthfulqa:mc|0": {
+        "mc1": 0.5691554467564259,
+        "mc1_stderr": 0.01733527247533237,
+        "mc2": 0.7184177934834866,
+        "mc2_stderr": 0.014995634120330182
+    },
+    "harness|winogrande|5": {
+        "acc": 0.8342541436464088,
+        "acc_stderr": 0.010450899545370632
+    },
+    "harness|gsm8k|5": {
+        "acc": 0.6535253980288097,
+        "acc_stderr": 0.013107179054313398
+    }
+}
 ```