{ "metadata": { "model_name": "llama-3.3-70b-instruct", "timestamp": "20241212-013440" }, "categories": { "STEM": { "subjects": [ { "subject": "high_school_chemistry", "correct_rate": 64.03940886699507 }, { "subject": "high_school_mathematics", "correct_rate": 46.666666666666664 }, { "subject": "abstract_algebra", "correct_rate": 48.0 }, { "subject": "computer_security", "correct_rate": 84.0 }, { "subject": "college_computer_science", "correct_rate": 61.61616161616161 }, { "subject": "college_chemistry", "correct_rate": 53.0 }, { "subject": "conceptual_physics", "correct_rate": 74.8936170212766 }, { "subject": "high_school_statistics", "correct_rate": 68.05555555555556 }, { "subject": "college_mathematics", "correct_rate": 44.0 }, { "subject": "college_biology", "correct_rate": 88.19444444444444 }, { "subject": "college_physics", "correct_rate": 52.94117647058824 }, { "subject": "elementary_mathematics", "correct_rate": 64.81481481481481 }, { "subject": "high_school_biology", "correct_rate": 88.70967741935483 }, { "subject": "high_school_physics", "correct_rate": 57.615894039735096 }, { "subject": "machine_learning", "correct_rate": 56.25 }, { "subject": "astronomy", "correct_rate": 88.1578947368421 }, { "subject": "electrical_engineering", "correct_rate": 69.6551724137931 }, { "subject": "high_school_computer_science", "correct_rate": 79.0 } ], "correct_rate": 66.09 }, "humanities": { "subjects": [ { "subject": "world_religions", "correct_rate": 84.7953216374269 }, { "subject": "high_school_us_history", "correct_rate": 89.70588235294117 }, { "subject": "moral_disputes", "correct_rate": 77.74566473988439 }, { "subject": "high_school_world_history", "correct_rate": 88.60759493670885 }, { "subject": "formal_logic", "correct_rate": 62.698412698412696 }, { "subject": "international_law", "correct_rate": 85.12396694214877 }, { "subject": "jurisprudence", "correct_rate": 76.85185185185185 }, { "subject": "professional_law", "correct_rate": 59.58279009126467 }, { "subject": "logical_fallacies", "correct_rate": 83.43558282208589 }, { "subject": "philosophy", "correct_rate": 74.27652733118971 }, { "subject": "moral_scenarios", "correct_rate": 78.65921787709496 }, { "subject": "prehistory", "correct_rate": 84.25925925925925 }, { "subject": "high_school_european_history", "correct_rate": 84.84848484848484 } ], "correct_rate": 79.28 }, "social sciences": { "subjects": [ { "subject": "high_school_geography", "correct_rate": 86.36363636363636 }, { "subject": "high_school_psychology", "correct_rate": 91.19266055045871 }, { "subject": "sociology", "correct_rate": 87.56218905472637 }, { "subject": "high_school_microeconomics", "correct_rate": 86.5546218487395 }, { "subject": "professional_psychology", "correct_rate": 76.79738562091504 }, { "subject": "security_studies", "correct_rate": 77.55102040816327 }, { "subject": "us_foreign_policy", "correct_rate": 91.0 }, { "subject": "public_relations", "correct_rate": 70.9090909090909 }, { "subject": "high_school_government_and_politics", "correct_rate": 93.78238341968913 }, { "subject": "econometrics", "correct_rate": 61.40350877192983 }, { "subject": "human_sexuality", "correct_rate": 81.67938931297711 }, { "subject": "high_school_macroeconomics", "correct_rate": 80.51282051282051 } ], "correct_rate": 82.11 }, "other (business, health, misc.)": { "subjects": [ { "subject": "virology", "correct_rate": 53.6144578313253 }, { "subject": "college_medicine", "correct_rate": 72.25433526011561 }, { "subject": "global_facts", "correct_rate": 62.0 }, { "subject": "miscellaneous", "correct_rate": 87.35632183908046 }, { "subject": "medical_genetics", "correct_rate": 84.0 }, { "subject": "human_aging", "correct_rate": 78.47533632286996 }, { "subject": "nutrition", "correct_rate": 83.33333333333334 }, { "subject": "marketing", "correct_rate": 88.88888888888889 }, { "subject": "anatomy", "correct_rate": 71.85185185185186 }, { "subject": "professional_medicine", "correct_rate": 88.23529411764706 }, { "subject": "professional_accounting", "correct_rate": 56.02836879432624 }, { "subject": "management", "correct_rate": 82.52427184466019 }, { "subject": "clinical_knowledge", "correct_rate": 80.75471698113208 }, { "subject": "business_ethics", "correct_rate": 74.0 } ], "correct_rate": 75.95 } }, "subjects": { "high_school_geography": { "correct_rate": 86.36363636363636, "category": "social sciences" }, "virology": { "correct_rate": 53.6144578313253, "category": "other (business, health, misc.)" }, "world_religions": { "correct_rate": 84.7953216374269, "category": "humanities" }, "college_medicine": { "correct_rate": 72.25433526011561, "category": "other (business, health, misc.)" }, "high_school_chemistry": { "correct_rate": 64.03940886699507, "category": "STEM" }, "high_school_mathematics": { "correct_rate": 46.666666666666664, "category": "STEM" }, "global_facts": { "correct_rate": 62.0, "category": "other (business, health, misc.)" }, "high_school_psychology": { "correct_rate": 91.19266055045871, "category": "social sciences" }, "abstract_algebra": { "correct_rate": 48.0, "category": "STEM" }, "computer_security": { "correct_rate": 84.0, "category": "STEM" }, "sociology": { "correct_rate": 87.56218905472637, "category": "social sciences" }, "college_computer_science": { "correct_rate": 61.61616161616161, "category": "STEM" }, "high_school_us_history": { "correct_rate": 89.70588235294117, "category": "humanities" }, "miscellaneous": { "correct_rate": 87.35632183908046, "category": "other (business, health, misc.)" }, "high_school_microeconomics": { "correct_rate": 86.5546218487395, "category": "social sciences" }, "college_chemistry": { "correct_rate": 53.0, "category": "STEM" }, "moral_disputes": { "correct_rate": 77.74566473988439, "category": "humanities" }, "medical_genetics": { "correct_rate": 84.0, "category": "other (business, health, misc.)" }, "professional_psychology": { "correct_rate": 76.79738562091504, "category": "social sciences" }, "high_school_world_history": { "correct_rate": 88.60759493670885, "category": "humanities" }, "security_studies": { "correct_rate": 77.55102040816327, "category": "social sciences" }, "conceptual_physics": { "correct_rate": 74.8936170212766, "category": "STEM" }, "us_foreign_policy": { "correct_rate": 91.0, "category": "social sciences" }, "human_aging": { "correct_rate": 78.47533632286996, "category": "other (business, health, misc.)" }, "public_relations": { "correct_rate": 70.9090909090909, "category": "social sciences" }, "high_school_statistics": { "correct_rate": 68.05555555555556, "category": "STEM" }, "formal_logic": { "correct_rate": 62.698412698412696, "category": "humanities" }, "nutrition": { "correct_rate": 83.33333333333334, "category": "other (business, health, misc.)" }, "marketing": { "correct_rate": 88.88888888888889, "category": "other (business, health, misc.)" }, "college_mathematics": { "correct_rate": 44.0, "category": "STEM" }, "international_law": { "correct_rate": 85.12396694214877, "category": "humanities" }, "anatomy": { "correct_rate": 71.85185185185186, "category": "other (business, health, misc.)" }, "jurisprudence": { "correct_rate": 76.85185185185185, "category": "humanities" }, "college_biology": { "correct_rate": 88.19444444444444, "category": "STEM" }, "college_physics": { "correct_rate": 52.94117647058824, "category": "STEM" }, "professional_law": { "correct_rate": 59.58279009126467, "category": "humanities" }, "high_school_government_and_politics": { "correct_rate": 93.78238341968913, "category": "social sciences" }, "professional_medicine": { "correct_rate": 88.23529411764706, "category": "other (business, health, misc.)" }, "logical_fallacies": { "correct_rate": 83.43558282208589, "category": "humanities" }, "professional_accounting": { "correct_rate": 56.02836879432624, "category": "other (business, health, misc.)" }, "philosophy": { "correct_rate": 74.27652733118971, "category": "humanities" }, "moral_scenarios": { "correct_rate": 78.65921787709496, "category": "humanities" }, "management": { "correct_rate": 82.52427184466019, "category": "other (business, health, misc.)" }, "elementary_mathematics": { "correct_rate": 64.81481481481481, "category": "STEM" }, "prehistory": { "correct_rate": 84.25925925925925, "category": "humanities" }, "high_school_biology": { "correct_rate": 88.70967741935483, "category": "STEM" }, "high_school_physics": { "correct_rate": 57.615894039735096, "category": "STEM" }, "machine_learning": { "correct_rate": 56.25, "category": "STEM" }, "high_school_european_history": { "correct_rate": 84.84848484848484, "category": "humanities" }, "econometrics": { "correct_rate": 61.40350877192983, "category": "social sciences" }, "clinical_knowledge": { "correct_rate": 80.75471698113208, "category": "other (business, health, misc.)" }, "human_sexuality": { "correct_rate": 81.67938931297711, "category": "social sciences" }, "high_school_macroeconomics": { "correct_rate": 80.51282051282051, "category": "social sciences" }, "astronomy": { "correct_rate": 88.1578947368421, "category": "STEM" }, "electrical_engineering": { "correct_rate": 69.6551724137931, "category": "STEM" }, "high_school_computer_science": { "correct_rate": 79.0, "category": "STEM" }, "business_ethics": { "correct_rate": 74.0, "category": "other (business, health, misc.)" } }, "overall_correct_rate": 74.89 }