LLaMA3-8B-RoLoRA / mmlu_W6_A6_RTN.log
ScarletAce's picture
Upload folder using huggingface_hub
53a2780 verified
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|---------------------------------------|-------|------|-----:|------|-----:|---|-----:|
|mmlu |N/A |none | 0|acc |0.5940|± |0.0039|
| - humanities |N/A |none | 0|acc |0.5269|± |0.0067|
| - formal_logic | 0|none | 0|acc |0.4365|± |0.0444|
| - high_school_european_history | 0|none | 0|acc |0.6727|± |0.0366|
| - high_school_us_history | 0|none | 0|acc |0.7598|± |0.0300|
| - high_school_world_history | 0|none | 0|acc |0.7890|± |0.0266|
| - international_law | 0|none | 0|acc |0.7769|± |0.0380|
| - jurisprudence | 0|none | 0|acc |0.7222|± |0.0433|
| - logical_fallacies | 0|none | 0|acc |0.6748|± |0.0368|
| - moral_disputes | 0|none | 0|acc |0.6647|± |0.0254|
| - moral_scenarios | 0|none | 0|acc |0.2380|± |0.0142|
| - philosophy | 0|none | 0|acc |0.6720|± |0.0267|
| - prehistory | 0|none | 0|acc |0.7037|± |0.0254|
| - professional_law | 0|none | 0|acc |0.4374|± |0.0127|
| - world_religions | 0|none | 0|acc |0.8070|± |0.0303|
| - other |N/A |none | 0|acc |0.6746|± |0.0081|
| - business_ethics | 0|none | 0|acc |0.5600|± |0.0499|
| - clinical_knowledge | 0|none | 0|acc |0.7057|± |0.0280|
| - college_medicine | 0|none | 0|acc |0.5954|± |0.0374|
| - global_facts | 0|none | 0|acc |0.3500|± |0.0479|
| - human_aging | 0|none | 0|acc |0.6816|± |0.0313|
| - management | 0|none | 0|acc |0.8252|± |0.0376|
| - marketing | 0|none | 0|acc |0.8419|± |0.0239|
| - medical_genetics | 0|none | 0|acc |0.7500|± |0.0435|
| - miscellaneous | 0|none | 0|acc |0.7739|± |0.0150|
| - nutrition | 0|none | 0|acc |0.7026|± |0.0262|
| - professional_accounting | 0|none | 0|acc |0.4362|± |0.0296|
| - professional_medicine | 0|none | 0|acc |0.6581|± |0.0288|
| - virology | 0|none | 0|acc |0.5000|± |0.0389|
| - social_sciences |N/A |none | 0|acc |0.7004|± |0.0081|
| - econometrics | 0|none | 0|acc |0.3421|± |0.0446|
| - high_school_geography | 0|none | 0|acc |0.7475|± |0.0310|
| - high_school_government_and_politics| 0|none | 0|acc |0.8187|± |0.0278|
| - high_school_macroeconomics | 0|none | 0|acc |0.6051|± |0.0248|
| - high_school_microeconomics | 0|none | 0|acc |0.6555|± |0.0309|
| - high_school_psychology | 0|none | 0|acc |0.7908|± |0.0174|
| - human_sexuality | 0|none | 0|acc |0.7023|± |0.0401|
| - professional_psychology | 0|none | 0|acc |0.6503|± |0.0193|
| - public_relations | 0|none | 0|acc |0.6545|± |0.0455|
| - security_studies | 0|none | 0|acc |0.7143|± |0.0289|
| - sociology | 0|none | 0|acc |0.8060|± |0.0280|
| - us_foreign_policy | 0|none | 0|acc |0.8800|± |0.0327|
| - stem |N/A |none | 0|acc |0.5109|± |0.0086|
| - abstract_algebra | 0|none | 0|acc |0.3300|± |0.0473|
| - anatomy | 0|none | 0|acc |0.6296|± |0.0417|
| - astronomy | 0|none | 0|acc |0.6842|± |0.0378|
| - college_biology | 0|none | 0|acc |0.7292|± |0.0372|
| - college_chemistry | 0|none | 0|acc |0.4500|± |0.0500|
| - college_computer_science | 0|none | 0|acc |0.4700|± |0.0502|
| - college_mathematics | 0|none | 0|acc |0.4300|± |0.0498|
| - college_physics | 0|none | 0|acc |0.4118|± |0.0490|
| - computer_security | 0|none | 0|acc |0.6900|± |0.0465|
| - conceptual_physics | 0|none | 0|acc |0.4936|± |0.0327|
| - electrical_engineering | 0|none | 0|acc |0.5448|± |0.0415|
| - elementary_mathematics | 0|none | 0|acc |0.4206|± |0.0254|
| - high_school_biology | 0|none | 0|acc |0.7032|± |0.0260|
| - high_school_chemistry | 0|none | 0|acc |0.5172|± |0.0352|
| - high_school_computer_science | 0|none | 0|acc |0.6500|± |0.0479|
| - high_school_mathematics | 0|none | 0|acc |0.3222|± |0.0285|
| - high_school_physics | 0|none | 0|acc |0.4106|± |0.0402|
| - high_school_statistics | 0|none | 0|acc |0.5093|± |0.0341|
| - machine_learning | 0|none | 0|acc |0.3304|± |0.0446|