|
{ |
|
"metadata": { |
|
"model_name": "llama-3.3-70b-instruct", |
|
"timestamp": "20241212-013440" |
|
}, |
|
"categories": { |
|
"STEM": { |
|
"subjects": [ |
|
{ |
|
"subject": "high_school_chemistry", |
|
"correct_rate": 64.03940886699507 |
|
}, |
|
{ |
|
"subject": "high_school_mathematics", |
|
"correct_rate": 46.666666666666664 |
|
}, |
|
{ |
|
"subject": "abstract_algebra", |
|
"correct_rate": 48.0 |
|
}, |
|
{ |
|
"subject": "computer_security", |
|
"correct_rate": 84.0 |
|
}, |
|
{ |
|
"subject": "college_computer_science", |
|
"correct_rate": 61.61616161616161 |
|
}, |
|
{ |
|
"subject": "college_chemistry", |
|
"correct_rate": 53.0 |
|
}, |
|
{ |
|
"subject": "conceptual_physics", |
|
"correct_rate": 74.8936170212766 |
|
}, |
|
{ |
|
"subject": "high_school_statistics", |
|
"correct_rate": 68.05555555555556 |
|
}, |
|
{ |
|
"subject": "college_mathematics", |
|
"correct_rate": 44.0 |
|
}, |
|
{ |
|
"subject": "college_biology", |
|
"correct_rate": 88.19444444444444 |
|
}, |
|
{ |
|
"subject": "college_physics", |
|
"correct_rate": 52.94117647058824 |
|
}, |
|
{ |
|
"subject": "elementary_mathematics", |
|
"correct_rate": 64.81481481481481 |
|
}, |
|
{ |
|
"subject": "high_school_biology", |
|
"correct_rate": 88.70967741935483 |
|
}, |
|
{ |
|
"subject": "high_school_physics", |
|
"correct_rate": 57.615894039735096 |
|
}, |
|
{ |
|
"subject": "machine_learning", |
|
"correct_rate": 56.25 |
|
}, |
|
{ |
|
"subject": "astronomy", |
|
"correct_rate": 88.1578947368421 |
|
}, |
|
{ |
|
"subject": "electrical_engineering", |
|
"correct_rate": 69.6551724137931 |
|
}, |
|
{ |
|
"subject": "high_school_computer_science", |
|
"correct_rate": 79.0 |
|
} |
|
], |
|
"correct_rate": 66.09 |
|
}, |
|
"humanities": { |
|
"subjects": [ |
|
{ |
|
"subject": "world_religions", |
|
"correct_rate": 84.7953216374269 |
|
}, |
|
{ |
|
"subject": "high_school_us_history", |
|
"correct_rate": 89.70588235294117 |
|
}, |
|
{ |
|
"subject": "moral_disputes", |
|
"correct_rate": 77.74566473988439 |
|
}, |
|
{ |
|
"subject": "high_school_world_history", |
|
"correct_rate": 88.60759493670885 |
|
}, |
|
{ |
|
"subject": "formal_logic", |
|
"correct_rate": 62.698412698412696 |
|
}, |
|
{ |
|
"subject": "international_law", |
|
"correct_rate": 85.12396694214877 |
|
}, |
|
{ |
|
"subject": "jurisprudence", |
|
"correct_rate": 76.85185185185185 |
|
}, |
|
{ |
|
"subject": "professional_law", |
|
"correct_rate": 59.58279009126467 |
|
}, |
|
{ |
|
"subject": "logical_fallacies", |
|
"correct_rate": 83.43558282208589 |
|
}, |
|
{ |
|
"subject": "philosophy", |
|
"correct_rate": 74.27652733118971 |
|
}, |
|
{ |
|
"subject": "moral_scenarios", |
|
"correct_rate": 78.65921787709496 |
|
}, |
|
{ |
|
"subject": "prehistory", |
|
"correct_rate": 84.25925925925925 |
|
}, |
|
{ |
|
"subject": "high_school_european_history", |
|
"correct_rate": 84.84848484848484 |
|
} |
|
], |
|
"correct_rate": 79.28 |
|
}, |
|
"social sciences": { |
|
"subjects": [ |
|
{ |
|
"subject": "high_school_geography", |
|
"correct_rate": 86.36363636363636 |
|
}, |
|
{ |
|
"subject": "high_school_psychology", |
|
"correct_rate": 91.19266055045871 |
|
}, |
|
{ |
|
"subject": "sociology", |
|
"correct_rate": 87.56218905472637 |
|
}, |
|
{ |
|
"subject": "high_school_microeconomics", |
|
"correct_rate": 86.5546218487395 |
|
}, |
|
{ |
|
"subject": "professional_psychology", |
|
"correct_rate": 76.79738562091504 |
|
}, |
|
{ |
|
"subject": "security_studies", |
|
"correct_rate": 77.55102040816327 |
|
}, |
|
{ |
|
"subject": "us_foreign_policy", |
|
"correct_rate": 91.0 |
|
}, |
|
{ |
|
"subject": "public_relations", |
|
"correct_rate": 70.9090909090909 |
|
}, |
|
{ |
|
"subject": "high_school_government_and_politics", |
|
"correct_rate": 93.78238341968913 |
|
}, |
|
{ |
|
"subject": "econometrics", |
|
"correct_rate": 61.40350877192983 |
|
}, |
|
{ |
|
"subject": "human_sexuality", |
|
"correct_rate": 81.67938931297711 |
|
}, |
|
{ |
|
"subject": "high_school_macroeconomics", |
|
"correct_rate": 80.51282051282051 |
|
} |
|
], |
|
"correct_rate": 82.11 |
|
}, |
|
"other (business, health, misc.)": { |
|
"subjects": [ |
|
{ |
|
"subject": "virology", |
|
"correct_rate": 53.6144578313253 |
|
}, |
|
{ |
|
"subject": "college_medicine", |
|
"correct_rate": 72.25433526011561 |
|
}, |
|
{ |
|
"subject": "global_facts", |
|
"correct_rate": 62.0 |
|
}, |
|
{ |
|
"subject": "miscellaneous", |
|
"correct_rate": 87.35632183908046 |
|
}, |
|
{ |
|
"subject": "medical_genetics", |
|
"correct_rate": 84.0 |
|
}, |
|
{ |
|
"subject": "human_aging", |
|
"correct_rate": 78.47533632286996 |
|
}, |
|
{ |
|
"subject": "nutrition", |
|
"correct_rate": 83.33333333333334 |
|
}, |
|
{ |
|
"subject": "marketing", |
|
"correct_rate": 88.88888888888889 |
|
}, |
|
{ |
|
"subject": "anatomy", |
|
"correct_rate": 71.85185185185186 |
|
}, |
|
{ |
|
"subject": "professional_medicine", |
|
"correct_rate": 88.23529411764706 |
|
}, |
|
{ |
|
"subject": "professional_accounting", |
|
"correct_rate": 56.02836879432624 |
|
}, |
|
{ |
|
"subject": "management", |
|
"correct_rate": 82.52427184466019 |
|
}, |
|
{ |
|
"subject": "clinical_knowledge", |
|
"correct_rate": 80.75471698113208 |
|
}, |
|
{ |
|
"subject": "business_ethics", |
|
"correct_rate": 74.0 |
|
} |
|
], |
|
"correct_rate": 75.95 |
|
} |
|
}, |
|
"subjects": { |
|
"high_school_geography": { |
|
"correct_rate": 86.36363636363636, |
|
"category": "social sciences" |
|
}, |
|
"virology": { |
|
"correct_rate": 53.6144578313253, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"world_religions": { |
|
"correct_rate": 84.7953216374269, |
|
"category": "humanities" |
|
}, |
|
"college_medicine": { |
|
"correct_rate": 72.25433526011561, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"high_school_chemistry": { |
|
"correct_rate": 64.03940886699507, |
|
"category": "STEM" |
|
}, |
|
"high_school_mathematics": { |
|
"correct_rate": 46.666666666666664, |
|
"category": "STEM" |
|
}, |
|
"global_facts": { |
|
"correct_rate": 62.0, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"high_school_psychology": { |
|
"correct_rate": 91.19266055045871, |
|
"category": "social sciences" |
|
}, |
|
"abstract_algebra": { |
|
"correct_rate": 48.0, |
|
"category": "STEM" |
|
}, |
|
"computer_security": { |
|
"correct_rate": 84.0, |
|
"category": "STEM" |
|
}, |
|
"sociology": { |
|
"correct_rate": 87.56218905472637, |
|
"category": "social sciences" |
|
}, |
|
"college_computer_science": { |
|
"correct_rate": 61.61616161616161, |
|
"category": "STEM" |
|
}, |
|
"high_school_us_history": { |
|
"correct_rate": 89.70588235294117, |
|
"category": "humanities" |
|
}, |
|
"miscellaneous": { |
|
"correct_rate": 87.35632183908046, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"high_school_microeconomics": { |
|
"correct_rate": 86.5546218487395, |
|
"category": "social sciences" |
|
}, |
|
"college_chemistry": { |
|
"correct_rate": 53.0, |
|
"category": "STEM" |
|
}, |
|
"moral_disputes": { |
|
"correct_rate": 77.74566473988439, |
|
"category": "humanities" |
|
}, |
|
"medical_genetics": { |
|
"correct_rate": 84.0, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"professional_psychology": { |
|
"correct_rate": 76.79738562091504, |
|
"category": "social sciences" |
|
}, |
|
"high_school_world_history": { |
|
"correct_rate": 88.60759493670885, |
|
"category": "humanities" |
|
}, |
|
"security_studies": { |
|
"correct_rate": 77.55102040816327, |
|
"category": "social sciences" |
|
}, |
|
"conceptual_physics": { |
|
"correct_rate": 74.8936170212766, |
|
"category": "STEM" |
|
}, |
|
"us_foreign_policy": { |
|
"correct_rate": 91.0, |
|
"category": "social sciences" |
|
}, |
|
"human_aging": { |
|
"correct_rate": 78.47533632286996, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"public_relations": { |
|
"correct_rate": 70.9090909090909, |
|
"category": "social sciences" |
|
}, |
|
"high_school_statistics": { |
|
"correct_rate": 68.05555555555556, |
|
"category": "STEM" |
|
}, |
|
"formal_logic": { |
|
"correct_rate": 62.698412698412696, |
|
"category": "humanities" |
|
}, |
|
"nutrition": { |
|
"correct_rate": 83.33333333333334, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"marketing": { |
|
"correct_rate": 88.88888888888889, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"college_mathematics": { |
|
"correct_rate": 44.0, |
|
"category": "STEM" |
|
}, |
|
"international_law": { |
|
"correct_rate": 85.12396694214877, |
|
"category": "humanities" |
|
}, |
|
"anatomy": { |
|
"correct_rate": 71.85185185185186, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"jurisprudence": { |
|
"correct_rate": 76.85185185185185, |
|
"category": "humanities" |
|
}, |
|
"college_biology": { |
|
"correct_rate": 88.19444444444444, |
|
"category": "STEM" |
|
}, |
|
"college_physics": { |
|
"correct_rate": 52.94117647058824, |
|
"category": "STEM" |
|
}, |
|
"professional_law": { |
|
"correct_rate": 59.58279009126467, |
|
"category": "humanities" |
|
}, |
|
"high_school_government_and_politics": { |
|
"correct_rate": 93.78238341968913, |
|
"category": "social sciences" |
|
}, |
|
"professional_medicine": { |
|
"correct_rate": 88.23529411764706, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"logical_fallacies": { |
|
"correct_rate": 83.43558282208589, |
|
"category": "humanities" |
|
}, |
|
"professional_accounting": { |
|
"correct_rate": 56.02836879432624, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"philosophy": { |
|
"correct_rate": 74.27652733118971, |
|
"category": "humanities" |
|
}, |
|
"moral_scenarios": { |
|
"correct_rate": 78.65921787709496, |
|
"category": "humanities" |
|
}, |
|
"management": { |
|
"correct_rate": 82.52427184466019, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"elementary_mathematics": { |
|
"correct_rate": 64.81481481481481, |
|
"category": "STEM" |
|
}, |
|
"prehistory": { |
|
"correct_rate": 84.25925925925925, |
|
"category": "humanities" |
|
}, |
|
"high_school_biology": { |
|
"correct_rate": 88.70967741935483, |
|
"category": "STEM" |
|
}, |
|
"high_school_physics": { |
|
"correct_rate": 57.615894039735096, |
|
"category": "STEM" |
|
}, |
|
"machine_learning": { |
|
"correct_rate": 56.25, |
|
"category": "STEM" |
|
}, |
|
"high_school_european_history": { |
|
"correct_rate": 84.84848484848484, |
|
"category": "humanities" |
|
}, |
|
"econometrics": { |
|
"correct_rate": 61.40350877192983, |
|
"category": "social sciences" |
|
}, |
|
"clinical_knowledge": { |
|
"correct_rate": 80.75471698113208, |
|
"category": "other (business, health, misc.)" |
|
}, |
|
"human_sexuality": { |
|
"correct_rate": 81.67938931297711, |
|
"category": "social sciences" |
|
}, |
|
"high_school_macroeconomics": { |
|
"correct_rate": 80.51282051282051, |
|
"category": "social sciences" |
|
}, |
|
"astronomy": { |
|
"correct_rate": 88.1578947368421, |
|
"category": "STEM" |
|
}, |
|
"electrical_engineering": { |
|
"correct_rate": 69.6551724137931, |
|
"category": "STEM" |
|
}, |
|
"high_school_computer_science": { |
|
"correct_rate": 79.0, |
|
"category": "STEM" |
|
}, |
|
"business_ethics": { |
|
"correct_rate": 74.0, |
|
"category": "other (business, health, misc.)" |
|
} |
|
}, |
|
"overall_correct_rate": 74.89 |
|
} |