mtasic85's picture
eval
5810e93
|
raw
history blame
10.1 kB
metadata
license: apache-2.0
pipeline_tag: text-generation
library_name: transformers
language:
  - en
  - am
  - ar
  - as
  - az
  - be
  - bg
  - bn
  - br
  - bs
  - ca
  - cs
  - cy
  - da
  - de
  - el
  - eo
  - es
  - et
  - eu
  - fa
  - ff
  - fi
  - fr
  - fy
  - ga
  - gd
  - gl
  - gn
  - gu
  - ha
  - he
  - hi
  - hr
  - ht
  - hu
  - hy
  - id
  - ig
  - is
  - it
  - ja
  - jv
  - ka
  - kk
  - km
  - kn
  - ko
  - ku
  - ky
  - la
  - lg
  - li
  - ln
  - lo
  - lt
  - lv
  - mg
  - mk
  - ml
  - mn
  - mr
  - ms
  - my
  - ne
  - nl
  - 'no'
  - ns
  - om
  - or
  - pa
  - pl
  - ps
  - pt
  - qu
  - rm
  - ro
  - ru
  - sa
  - si
  - sc
  - sd
  - sk
  - sl
  - so
  - sq
  - sr
  - ss
  - su
  - sv
  - sw
  - ta
  - te
  - th
  - tl
  - tn
  - tr
  - ug
  - uk
  - ur
  - uz
  - vi
  - wo
  - xh
  - yi
  - yo
  - zu
datasets:
  - bigcode/programming-languages-keywords
  - bigcode/the-stack-smol-xs
  - nampdn-ai/tiny-textbooks
  - xu-song/cc100-samples
  - m-a-p/CodeFeedback-Filtered-Instruction
  - nampdn-ai/tiny-codes
  - ajibawa-2023/Maths-College
  - microsoft/orca-math-word-problems-200k
  - mlabonne/FineTome-100k
  - arcee-ai/agent-data
  - cognitivecomputations/SystemChat-2.0
  - badrex/llm-emoji-dataset
tags:
  - litgpt
  - litdata

tangled-llama-108m-32k-base-v0.1

loss, val_loss val_ppl epoch learning_rate

lm-evaluation-harness

Tasks Version Filter n-shot Metric Value Stderr
arc_challenge 1 none 0 acc 0.1937 ± 0.0115
none 0 acc_norm 0.2363 ± 0.0124
gsm8k 3 flexible-extract 5 exact_match 0.0136 ± 0.0032
strict-match 5 exact_match 0.0000 ± 0.0000
hellaswag 1 none 0 acc 0.2659 ± 0.0044
none 0 acc_norm 0.2709 ± 0.0044
mmlu 2 none acc 0.2309 ± 0.0036
- humanities 2 none acc 0.2370 ± 0.0062
- formal_logic 1 none 0 acc 0.2778 ± 0.0401
- high_school_european_history 1 none 0 acc 0.2303 ± 0.0329
- high_school_us_history 1 none 0 acc 0.2402 ± 0.0300
- high_school_world_history 1 none 0 acc 0.2405 ± 0.0278
- international_law 1 none 0 acc 0.1983 ± 0.0364
- jurisprudence 1 none 0 acc 0.2315 ± 0.0408
- logical_fallacies 1 none 0 acc 0.1840 ± 0.0304
- moral_disputes 1 none 0 acc 0.2110 ± 0.0220
- moral_scenarios 1 none 0 acc 0.2380 ± 0.0142
- philosophy 1 none 0 acc 0.1994 ± 0.0227
- prehistory 1 none 0 acc 0.2315 ± 0.0235
- professional_law 1 none 0 acc 0.2510 ± 0.0111
- world_religions 1 none 0 acc 0.2865 ± 0.0347
- other 2 none acc 0.2372 ± 0.0076
- business_ethics 1 none 0 acc 0.2900 ± 0.0456
- clinical_knowledge 1 none 0 acc 0.2113 ± 0.0251
- college_medicine 1 none 0 acc 0.2023 ± 0.0306
- global_facts 1 none 0 acc 0.1900 ± 0.0394
- human_aging 1 none 0 acc 0.3004 ± 0.0308
- management 1 none 0 acc 0.1748 ± 0.0376
- marketing 1 none 0 acc 0.2863 ± 0.0296
- medical_genetics 1 none 0 acc 0.2700 ± 0.0446
- miscellaneous 1 none 0 acc 0.2337 ± 0.0151
- nutrition 1 none 0 acc 0.2255 ± 0.0239
- professional_accounting 1 none 0 acc 0.2411 ± 0.0255
- professional_medicine 1 none 0 acc 0.1985 ± 0.0242
- virology 1 none 0 acc 0.2711 ± 0.0346
- social sciences 2 none acc 0.2278 ± 0.0076
- econometrics 1 none 0 acc 0.2105 ± 0.0384
- high_school_geography 1 none 0 acc 0.1768 ± 0.0272
- high_school_government_and_politics 1 none 0 acc 0.2280 ± 0.0303
- high_school_macroeconomics 1 none 0 acc 0.2436 ± 0.0218
- high_school_microeconomics 1 none 0 acc 0.2395 ± 0.0277
- high_school_psychology 1 none 0 acc 0.2037 ± 0.0173
- human_sexuality 1 none 0 acc 0.2595 ± 0.0384
- professional_psychology 1 none 0 acc 0.2386 ± 0.0172
- public_relations 1 none 0 acc 0.2091 ± 0.0390
- security_studies 1 none 0 acc 0.2490 ± 0.0277
- sociology 1 none 0 acc 0.1990 ± 0.0282
- us_foreign_policy 1 none 0 acc 0.3100 ± 0.0465
- stem 2 none acc 0.2185 ± 0.0074
- abstract_algebra 1 none 0 acc 0.2600 ± 0.0441
- anatomy 1 none 0 acc 0.1630 ± 0.0319
- astronomy 1 none 0 acc 0.2237 ± 0.0339
- college_biology 1 none 0 acc 0.2708 ± 0.0372
- college_chemistry 1 none 0 acc 0.2300 ± 0.0423
- college_computer_science 1 none 0 acc 0.2100 ± 0.0409
- college_mathematics 1 none 0 acc 0.2200 ± 0.0416
- college_physics 1 none 0 acc 0.2647 ± 0.0439
- computer_security 1 none 0 acc 0.3000 ± 0.0461
- conceptual_physics 1 none 0 acc 0.2000 ± 0.0261
- electrical_engineering 1 none 0 acc 0.2345 ± 0.0353
- elementary_mathematics 1 none 0 acc 0.2302 ± 0.0217
- high_school_biology 1 none 0 acc 0.1903 ± 0.0223
- high_school_chemistry 1 none 0 acc 0.1527 ± 0.0253
- high_school_computer_science 1 none 0 acc 0.2700 ± 0.0446
- high_school_mathematics 1 none 0 acc 0.1926 ± 0.0240
- high_school_physics 1 none 0 acc 0.2053 ± 0.0330
- high_school_statistics 1 none 0 acc 0.2130 ± 0.0279
- machine_learning 1 none 0 acc 0.2768 ± 0.0425
truthfulqa_mc2 2 none 0 acc 0.4683 ± 0.0160
winogrande 1 none 0 acc 0.5075 ± 0.0141
Groups Version Filter n-shot Metric Value Stderr
mmlu 2 none acc 0.2309 ± 0.0036
- humanities 2 none acc 0.2370 ± 0.0062
- other 2 none acc 0.2372 ± 0.0076
- social sciences 2 none acc 0.2278 ± 0.0076
- stem 2 none acc 0.2185 ± 0.0074