Llama-3.3-70B-Instruct_Q2_K.gguf / (MMLU) results_20241212-013440.json
j30231's picture
Upload 59 files
0313279 verified
{
"metadata": {
"model_name": "llama-3.3-70b-instruct",
"timestamp": "20241212-013440"
},
"categories": {
"STEM": {
"subjects": [
{
"subject": "high_school_chemistry",
"correct_rate": 64.03940886699507
},
{
"subject": "high_school_mathematics",
"correct_rate": 46.666666666666664
},
{
"subject": "abstract_algebra",
"correct_rate": 48.0
},
{
"subject": "computer_security",
"correct_rate": 84.0
},
{
"subject": "college_computer_science",
"correct_rate": 61.61616161616161
},
{
"subject": "college_chemistry",
"correct_rate": 53.0
},
{
"subject": "conceptual_physics",
"correct_rate": 74.8936170212766
},
{
"subject": "high_school_statistics",
"correct_rate": 68.05555555555556
},
{
"subject": "college_mathematics",
"correct_rate": 44.0
},
{
"subject": "college_biology",
"correct_rate": 88.19444444444444
},
{
"subject": "college_physics",
"correct_rate": 52.94117647058824
},
{
"subject": "elementary_mathematics",
"correct_rate": 64.81481481481481
},
{
"subject": "high_school_biology",
"correct_rate": 88.70967741935483
},
{
"subject": "high_school_physics",
"correct_rate": 57.615894039735096
},
{
"subject": "machine_learning",
"correct_rate": 56.25
},
{
"subject": "astronomy",
"correct_rate": 88.1578947368421
},
{
"subject": "electrical_engineering",
"correct_rate": 69.6551724137931
},
{
"subject": "high_school_computer_science",
"correct_rate": 79.0
}
],
"correct_rate": 66.09
},
"humanities": {
"subjects": [
{
"subject": "world_religions",
"correct_rate": 84.7953216374269
},
{
"subject": "high_school_us_history",
"correct_rate": 89.70588235294117
},
{
"subject": "moral_disputes",
"correct_rate": 77.74566473988439
},
{
"subject": "high_school_world_history",
"correct_rate": 88.60759493670885
},
{
"subject": "formal_logic",
"correct_rate": 62.698412698412696
},
{
"subject": "international_law",
"correct_rate": 85.12396694214877
},
{
"subject": "jurisprudence",
"correct_rate": 76.85185185185185
},
{
"subject": "professional_law",
"correct_rate": 59.58279009126467
},
{
"subject": "logical_fallacies",
"correct_rate": 83.43558282208589
},
{
"subject": "philosophy",
"correct_rate": 74.27652733118971
},
{
"subject": "moral_scenarios",
"correct_rate": 78.65921787709496
},
{
"subject": "prehistory",
"correct_rate": 84.25925925925925
},
{
"subject": "high_school_european_history",
"correct_rate": 84.84848484848484
}
],
"correct_rate": 79.28
},
"social sciences": {
"subjects": [
{
"subject": "high_school_geography",
"correct_rate": 86.36363636363636
},
{
"subject": "high_school_psychology",
"correct_rate": 91.19266055045871
},
{
"subject": "sociology",
"correct_rate": 87.56218905472637
},
{
"subject": "high_school_microeconomics",
"correct_rate": 86.5546218487395
},
{
"subject": "professional_psychology",
"correct_rate": 76.79738562091504
},
{
"subject": "security_studies",
"correct_rate": 77.55102040816327
},
{
"subject": "us_foreign_policy",
"correct_rate": 91.0
},
{
"subject": "public_relations",
"correct_rate": 70.9090909090909
},
{
"subject": "high_school_government_and_politics",
"correct_rate": 93.78238341968913
},
{
"subject": "econometrics",
"correct_rate": 61.40350877192983
},
{
"subject": "human_sexuality",
"correct_rate": 81.67938931297711
},
{
"subject": "high_school_macroeconomics",
"correct_rate": 80.51282051282051
}
],
"correct_rate": 82.11
},
"other (business, health, misc.)": {
"subjects": [
{
"subject": "virology",
"correct_rate": 53.6144578313253
},
{
"subject": "college_medicine",
"correct_rate": 72.25433526011561
},
{
"subject": "global_facts",
"correct_rate": 62.0
},
{
"subject": "miscellaneous",
"correct_rate": 87.35632183908046
},
{
"subject": "medical_genetics",
"correct_rate": 84.0
},
{
"subject": "human_aging",
"correct_rate": 78.47533632286996
},
{
"subject": "nutrition",
"correct_rate": 83.33333333333334
},
{
"subject": "marketing",
"correct_rate": 88.88888888888889
},
{
"subject": "anatomy",
"correct_rate": 71.85185185185186
},
{
"subject": "professional_medicine",
"correct_rate": 88.23529411764706
},
{
"subject": "professional_accounting",
"correct_rate": 56.02836879432624
},
{
"subject": "management",
"correct_rate": 82.52427184466019
},
{
"subject": "clinical_knowledge",
"correct_rate": 80.75471698113208
},
{
"subject": "business_ethics",
"correct_rate": 74.0
}
],
"correct_rate": 75.95
}
},
"subjects": {
"high_school_geography": {
"correct_rate": 86.36363636363636,
"category": "social sciences"
},
"virology": {
"correct_rate": 53.6144578313253,
"category": "other (business, health, misc.)"
},
"world_religions": {
"correct_rate": 84.7953216374269,
"category": "humanities"
},
"college_medicine": {
"correct_rate": 72.25433526011561,
"category": "other (business, health, misc.)"
},
"high_school_chemistry": {
"correct_rate": 64.03940886699507,
"category": "STEM"
},
"high_school_mathematics": {
"correct_rate": 46.666666666666664,
"category": "STEM"
},
"global_facts": {
"correct_rate": 62.0,
"category": "other (business, health, misc.)"
},
"high_school_psychology": {
"correct_rate": 91.19266055045871,
"category": "social sciences"
},
"abstract_algebra": {
"correct_rate": 48.0,
"category": "STEM"
},
"computer_security": {
"correct_rate": 84.0,
"category": "STEM"
},
"sociology": {
"correct_rate": 87.56218905472637,
"category": "social sciences"
},
"college_computer_science": {
"correct_rate": 61.61616161616161,
"category": "STEM"
},
"high_school_us_history": {
"correct_rate": 89.70588235294117,
"category": "humanities"
},
"miscellaneous": {
"correct_rate": 87.35632183908046,
"category": "other (business, health, misc.)"
},
"high_school_microeconomics": {
"correct_rate": 86.5546218487395,
"category": "social sciences"
},
"college_chemistry": {
"correct_rate": 53.0,
"category": "STEM"
},
"moral_disputes": {
"correct_rate": 77.74566473988439,
"category": "humanities"
},
"medical_genetics": {
"correct_rate": 84.0,
"category": "other (business, health, misc.)"
},
"professional_psychology": {
"correct_rate": 76.79738562091504,
"category": "social sciences"
},
"high_school_world_history": {
"correct_rate": 88.60759493670885,
"category": "humanities"
},
"security_studies": {
"correct_rate": 77.55102040816327,
"category": "social sciences"
},
"conceptual_physics": {
"correct_rate": 74.8936170212766,
"category": "STEM"
},
"us_foreign_policy": {
"correct_rate": 91.0,
"category": "social sciences"
},
"human_aging": {
"correct_rate": 78.47533632286996,
"category": "other (business, health, misc.)"
},
"public_relations": {
"correct_rate": 70.9090909090909,
"category": "social sciences"
},
"high_school_statistics": {
"correct_rate": 68.05555555555556,
"category": "STEM"
},
"formal_logic": {
"correct_rate": 62.698412698412696,
"category": "humanities"
},
"nutrition": {
"correct_rate": 83.33333333333334,
"category": "other (business, health, misc.)"
},
"marketing": {
"correct_rate": 88.88888888888889,
"category": "other (business, health, misc.)"
},
"college_mathematics": {
"correct_rate": 44.0,
"category": "STEM"
},
"international_law": {
"correct_rate": 85.12396694214877,
"category": "humanities"
},
"anatomy": {
"correct_rate": 71.85185185185186,
"category": "other (business, health, misc.)"
},
"jurisprudence": {
"correct_rate": 76.85185185185185,
"category": "humanities"
},
"college_biology": {
"correct_rate": 88.19444444444444,
"category": "STEM"
},
"college_physics": {
"correct_rate": 52.94117647058824,
"category": "STEM"
},
"professional_law": {
"correct_rate": 59.58279009126467,
"category": "humanities"
},
"high_school_government_and_politics": {
"correct_rate": 93.78238341968913,
"category": "social sciences"
},
"professional_medicine": {
"correct_rate": 88.23529411764706,
"category": "other (business, health, misc.)"
},
"logical_fallacies": {
"correct_rate": 83.43558282208589,
"category": "humanities"
},
"professional_accounting": {
"correct_rate": 56.02836879432624,
"category": "other (business, health, misc.)"
},
"philosophy": {
"correct_rate": 74.27652733118971,
"category": "humanities"
},
"moral_scenarios": {
"correct_rate": 78.65921787709496,
"category": "humanities"
},
"management": {
"correct_rate": 82.52427184466019,
"category": "other (business, health, misc.)"
},
"elementary_mathematics": {
"correct_rate": 64.81481481481481,
"category": "STEM"
},
"prehistory": {
"correct_rate": 84.25925925925925,
"category": "humanities"
},
"high_school_biology": {
"correct_rate": 88.70967741935483,
"category": "STEM"
},
"high_school_physics": {
"correct_rate": 57.615894039735096,
"category": "STEM"
},
"machine_learning": {
"correct_rate": 56.25,
"category": "STEM"
},
"high_school_european_history": {
"correct_rate": 84.84848484848484,
"category": "humanities"
},
"econometrics": {
"correct_rate": 61.40350877192983,
"category": "social sciences"
},
"clinical_knowledge": {
"correct_rate": 80.75471698113208,
"category": "other (business, health, misc.)"
},
"human_sexuality": {
"correct_rate": 81.67938931297711,
"category": "social sciences"
},
"high_school_macroeconomics": {
"correct_rate": 80.51282051282051,
"category": "social sciences"
},
"astronomy": {
"correct_rate": 88.1578947368421,
"category": "STEM"
},
"electrical_engineering": {
"correct_rate": 69.6551724137931,
"category": "STEM"
},
"high_school_computer_science": {
"correct_rate": 79.0,
"category": "STEM"
},
"business_ethics": {
"correct_rate": 74.0,
"category": "other (business, health, misc.)"
}
},
"overall_correct_rate": 74.89
}