grg commited on
Commit
348bacd
1 Parent(s): 2be5d2e

Adding llama-3.1-nemotron-70B-instruct and fixing hermes_3.

Browse files
static/leaderboard.csv CHANGED
@@ -1,35 +1,36 @@
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
- hermes_3_llama_3.1_8b_instruct,0.4638047138047138,0.4117221943281449,0.16520527634373441,0.25311021741644446,0.5822944444444444,0.3526722222222223,0.3435555555555555
3
- gemma-2-2b-it,0.3733164983164983,0.3309858600428668,0.14746606707946294,0.263080165752695,0.40932500000000005,0.550475,0.5377472222222222
4
- gemma-2-9b-it,0.7432659932659933,0.6020857503693501,0.43782539244147833,0.20116278903333318,0.7543666666666667,0.23989444444444452,0.24792499999999995
5
- gemma-2-27b-it,0.6372053872053871,0.5270946699366518,0.3917304045417486,0.2058170364515589,0.5997861111111111,0.37121111111111116,0.37292222222222227
6
- phi-3-mini-128k-instruct,0.3253367003367003,0.32984992817164005,0.039299993295009855,0.281800547806919,0.5861361111111111,0.42524166666666674,0.3974944444444444
7
- phi-3-medium-128k-instruct,0.3282828282828283,0.30802986933853177,0.09692037989916814,0.2651981204439735,0.43025555555555556,0.5503277777777777,0.5381722222222222
8
- phi-3.5-mini-instruct,0.24200336700336697,0.2680653144619754,0.0361229186530762,0.28422749224983457,0.40715555555555555,0.5721138888888888,0.5507833333333333
9
- phi-3.5-MoE-instruct,0.39520202020202017,0.36128192067041315,0.10985291697837646,0.2739229692168671,0.5530944444444444,0.4248777777777778,0.40345
10
- Mistral-7B-Instruct-v0.1,0.2175925925925926,0.26609566354811315,0.027216280472015988,0.2829498135031582,0.38917777777777773,0.5561138888888888,0.530213888888889
11
- Mistral-7B-Instruct-v0.2,0.35058922558922556,0.32133832899241477,0.14417876497818388,0.265188983528973,0.3802722222222222,0.5727305555555555,0.5483611111111111
12
- Mistral-7B-Instruct-v0.3,0.2558922558922559,0.26572479479146804,0.07960539866974455,0.2742399030139009,0.31385,0.6241,0.6081333333333333
13
- Mixtral-8x7B-Instruct-v0.1,0.44486531986531985,0.3819009850972602,0.21473356319081474,0.2624402608740656,0.45275,0.5034666666666667,0.4905694444444444
14
- Mixtral-8x22B-Instruct-v0.1,0.3421717171717171,0.31529864972153404,0.1414001940345544,0.2548838005881672,0.3772361111111111,0.5810888888888889,0.5844750000000001
15
- command_r_plus,0.5892255892255892,0.4995356672762356,0.3429686514651868,0.23811982320641845,0.6033000000000001,0.3740166666666668,0.3667527777777777
16
- llama_3_8b_instruct,0.49915824915824913,0.4295836112681494,0.24527785038654715,0.245806400289881,0.5498222222222222,0.42656388888888896,0.42189444444444446
17
- llama_3_70b_instruct,0.7882996632996634,0.6839540364836003,0.607020698814379,0.18525883672204868,0.7210055555555557,0.2346083333333333,0.25758888888888887
18
- llama_3.1_8b_instruct,0.5795454545454546,0.4786874422110324,0.4295080949846363,0.22060228669473025,0.4305722222222223,0.5455027777777777,0.553
19
- llama_3.1_70b_instruct,0.8282828282828283,0.7172545013390067,0.691365862744007,0.1709718847084183,0.6979472222222223,0.2636777777777777,0.2907250000000001
20
- llama_3.1_405b_instruct_4bit,0.7394781144781145,0.6490864350383405,0.7232098126552619,0.1702199925365422,0.4875722222222223,0.4963444444444445,0.5211555555555556
21
- llama_3.2_1b_instruct,0.21506734006734007,0.2522036562381785,0.027192115495770382,0.29255310096654275,0.37450000000000006,0.5990222222222223,0.5740638888888888
22
- llama_3.2_3b_instruct,0.3985690235690236,0.3615804465210719,0.13450325180647235,0.27485276839064654,0.5017,0.44956666666666667,0.4226500000000001
23
- Qwen2-7B-Instruct,0.41498316498316495,0.36370005127542027,0.25108519506513916,0.25776537005719313,0.3560861111111111,0.6009722222222222,0.5920888888888889
24
- Qwen2-72B-Instruct,0.5829124579124579,0.5461212335522644,0.6465993243020925,0.20297742879025626,0.3045,0.6543138888888889,0.6646361111111111
25
- Qwen2.5-0.5B-Instruct,0.2882996632996633,0.3005554090516966,0.002970456550606876,0.2928913315666324,0.5371250000000001,0.44709722222222226,0.404575
26
- Qwen2.5-7B-Instruct,0.622053872053872,0.5163098181421168,0.333554494486959,0.2505866550331236,0.6473694444444444,0.30400277777777773,0.29651944444444434
27
- Qwen2.5-32B-Instruct,0.7411616161616161,0.656917654644944,0.6724190751477237,0.1806656189868978,0.5603222222222223,0.40237500000000004,0.41161666666666663
28
- Qwen2.5-72B-Instruct,0.8274410774410774,0.7104489147495714,0.6974116787371809,0.16176650806326276,0.6734583333333333,0.2993,0.3184472222222223
29
- gpt-3.5-turbo-0125,0.24452861952861948,0.28218378886707396,0.08240359836763214,0.28728574920060357,0.3873055555555555,0.599925,0.572238888888889
30
- gpt-4o-0513,0.6973905723905723,0.5989532974661671,0.5122163952167618,0.19201420113771173,0.6235416666666667,0.34458611111111115,0.3441805555555555
31
- gpt-4o-mini-2024-07-18,0.37542087542087543,0.3418785071827972,0.13575309046266867,0.2707065266105181,0.44214722222222214,0.5004583333333332,0.47896666666666665
32
- Mistral-Large-Instruct-2407,0.8455387205387205,0.7374229691535793,0.7644582301049158,0.16944638941325085,0.6510750000000001,0.31028611111111104,0.3297916666666667
33
- Mistral-Nemo-Instruct-2407,0.5904882154882155,0.5262426956484347,0.4414072595011627,0.21142636170606344,0.5161,0.42923055555555545,0.43113055555555546
34
- Mistral-Small-Instruct-2409,0.7815656565656567,0.6890378862258165,0.6416815833333804,0.1894343546381,0.6840472222222221,0.2601583333333335,0.2888777777777778
35
- dummy,0.18855218855218855,0.2291015386716794,-0.009004148398032956,0.2928877637010999,0.3755222222222222,0.622275,0.5915305555555557
 
 
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA
2
+ llama-3.1-nemotron-70B-instruct,0.8696895424836603,0.751782963334874,0.7174031652092134,0.16209339860230643,0.7561694444444446,0.21189444444444439,0.23753055555555547
3
+ hermes_3_llama_3.1_8b,0.4534313725490196,0.4117221943281449,0.16520527634373441,0.25311021741644446,0.5822944444444444,0.3526722222222223,0.3435555555555555
4
+ gemma-2-2b-it,0.3635620915032679,0.3309858600428668,0.14746606707946294,0.263080165752695,0.40932500000000005,0.550475,0.5377472222222222
5
+ gemma-2-9b-it,0.7287581699346405,0.6020857503693501,0.43782539244147833,0.20116278903333318,0.7543666666666667,0.23989444444444452,0.24792499999999995
6
+ gemma-2-27b-it,0.6225490196078431,0.5270946699366518,0.3917304045417486,0.2058170364515589,0.5997861111111111,0.37121111111111116,0.37292222222222227
7
+ phi-3-mini-128k-instruct,0.31862745098039214,0.32984992817164005,0.039299993295009855,0.281800547806919,0.5861361111111111,0.42524166666666674,0.3974944444444444
8
+ phi-3-medium-128k-instruct,0.3198529411764706,0.30802986933853177,0.09692037989916814,0.2651981204439735,0.43025555555555556,0.5503277777777777,0.5381722222222222
9
+ phi-3.5-mini-instruct,0.23651960784313728,0.2680653144619754,0.0361229186530762,0.28422749224983457,0.40715555555555555,0.5721138888888888,0.5507833333333333
10
+ phi-3.5-MoE-instruct,0.38480392156862747,0.36128192067041315,0.10985291697837646,0.2739229692168671,0.5530944444444444,0.4248777777777778,0.40345
11
+ Mistral-7B-Instruct-v0.1,0.2128267973856209,0.26609566354811315,0.027216280472015988,0.2829498135031582,0.38917777777777773,0.5561138888888888,0.530213888888889
12
+ Mistral-7B-Instruct-v0.2,0.3415032679738562,0.32133832899241477,0.14417876497818388,0.265188983528973,0.3802722222222222,0.5727305555555555,0.5483611111111111
13
+ Mistral-7B-Instruct-v0.3,0.25,0.26572479479146804,0.07960539866974455,0.2742399030139009,0.31385,0.6241,0.6081333333333333
14
+ Mixtral-8x7B-Instruct-v0.1,0.4334150326797386,0.3819009850972602,0.21473356319081474,0.2624402608740656,0.45275,0.5034666666666667,0.4905694444444444
15
+ Mixtral-8x22B-Instruct-v0.1,0.3349673202614379,0.31529864972153404,0.1414001940345544,0.2548838005881672,0.3772361111111111,0.5810888888888889,0.5844750000000001
16
+ command_r_plus,0.5755718954248366,0.4995356672762356,0.3429686514651868,0.23811982320641845,0.6033000000000001,0.3740166666666668,0.3667527777777777
17
+ llama_3_8b_instruct,0.48815359477124187,0.4295836112681494,0.24527785038654715,0.245806400289881,0.5498222222222222,0.42656388888888896,0.42189444444444446
18
+ llama_3_70b_instruct,0.770016339869281,0.6839540364836003,0.607020698814379,0.18525883672204868,0.7210055555555557,0.2346083333333333,0.25758888888888887
19
+ llama_3.1_8b_instruct,0.5637254901960785,0.4786874422110324,0.4295080949846363,0.22060228669473025,0.4305722222222223,0.5455027777777777,0.553
20
+ llama_3.1_70b_instruct,0.8112745098039216,0.7172545013390067,0.691365862744007,0.1709718847084183,0.6979472222222223,0.2636777777777777,0.2907250000000001
21
+ llama_3.1_405b_instruct_4bit,0.7283496732026143,0.6490864350383405,0.7232098126552619,0.1702199925365422,0.4875722222222223,0.4963444444444445,0.5211555555555556
22
+ llama_3.2_1b_instruct,0.2107843137254902,0.2522036562381785,0.027192115495770382,0.29255310096654275,0.37450000000000006,0.5990222222222223,0.5740638888888888
23
+ llama_3.2_3b_instruct,0.38929738562091504,0.3615804465210719,0.13450325180647235,0.27485276839064654,0.5017,0.44956666666666667,0.4226500000000001
24
+ Qwen2-7B-Instruct,0.4035947712418301,0.36370005127542027,0.25108519506513916,0.25776537005719313,0.3560861111111111,0.6009722222222222,0.5920888888888889
25
+ Qwen2-72B-Instruct,0.5690359477124183,0.5461212335522644,0.6465993243020925,0.20297742879025626,0.3045,0.6543138888888889,0.6646361111111111
26
+ Qwen2.5-0.5B-Instruct,0.2822712418300653,0.3005554090516966,0.002970456550606876,0.2928913315666324,0.5371250000000001,0.44709722222222226,0.404575
27
+ Qwen2.5-7B-Instruct,0.6070261437908496,0.5163098181421168,0.333554494486959,0.2505866550331236,0.6473694444444444,0.30400277777777773,0.29651944444444434
28
+ Qwen2.5-32B-Instruct,0.7263071895424837,0.656917654644944,0.6724190751477237,0.1806656189868978,0.5603222222222223,0.40237500000000004,0.41161666666666663
29
+ Qwen2.5-72B-Instruct,0.8149509803921569,0.7104489147495714,0.6974116787371809,0.16176650806326276,0.6734583333333333,0.2993,0.3184472222222223
30
+ gpt-3.5-turbo-0125,0.23856209150326796,0.28218378886707396,0.08240359836763214,0.28728574920060357,0.3873055555555555,0.599925,0.572238888888889
31
+ gpt-4o-0513,0.6813725490196078,0.5989532974661671,0.5122163952167618,0.19201420113771173,0.6235416666666667,0.34458611111111115,0.3441805555555555
32
+ gpt-4o-mini-2024-07-18,0.36519607843137253,0.3418785071827972,0.13575309046266867,0.2707065266105181,0.44214722222222214,0.5004583333333332,0.47896666666666665
33
+ Mistral-Large-Instruct-2407,0.8370098039215687,0.7374229691535793,0.7644582301049158,0.16944638941325085,0.6510750000000001,0.31028611111111104,0.3297916666666667
34
+ Mistral-Nemo-Instruct-2407,0.5759803921568627,0.5262426956484347,0.4414072595011627,0.21142636170606344,0.5161,0.42923055555555545,0.43113055555555546
35
+ Mistral-Small-Instruct-2409,0.766748366013072,0.6890378862258165,0.6416815833333804,0.1894343546381,0.6840472222222221,0.2601583333333335,0.2888777777777778
36
+ dummy,0.1830065359477124,0.2291015386716794,-0.009004148398032956,0.2928877637010999,0.3755222222222222,0.622275,0.5915305555555557
static/models_data/cardinal.svg CHANGED
static/models_data/hermes_3_llama_3.1_8b_instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.5613,0.49920000000000003,0.337775,0.326675
3
+ chunk_1,0.64515,0.6025,0.323025,0.30965000000000004
4
+ chunk_2,0.359575,0.318875,0.555975,0.546925
5
+ chunk_3,0.825825,0.85975,0.0964,0.074575
6
+ chunk_4,0.655175,0.6530750000000001,0.32227500000000003,0.297125
7
+ chunk_chess_0,0.8679250000000001,0.8399749999999999,0.097225,0.091075
8
+ chunk_grammar_1,0.44962500000000005,0.349275,0.341025,0.33925
9
+ chunk_no_conv,0.6267,0.590625,0.3256,0.34885
10
+ chunk_svs_no_conv,0.249375,0.2491,0.77475,0.757875
static/models_data/hermes_3_llama_3.1_8b_instruct/matrix.svg ADDED
static/models_data/hermes_3_llama_3.1_8b_instruct/ranks.svg ADDED
static/models_data/hermes_3_llama_3.1_8b_instruct/structure.svg ADDED
static/models_data/llama-3.1-nemotron-70B-instruct/cfa_metrics.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Context chunk,CFI,TLI,SRMR,RMSEA
2
+ chunk_0,0.887575,0.852575,0.09179999999999999,0.0976
3
+ chunk_1,0.8674999999999999,0.82515,0.10354999999999999,0.123025
4
+ chunk_2,0.42595,0.4061,0.5570999999999999,0.550125
5
+ chunk_3,0.684625,0.6685000000000001,0.313475,0.317
6
+ chunk_4,0.8144750000000001,0.7522,0.12940000000000002,0.174425
7
+ chunk_chess_0,0.7880750000000001,0.711225,0.13935,0.19695000000000001
8
+ chunk_grammar_1,0.844025,0.79365,0.11574999999999999,0.1575
9
+ chunk_no_conv,0.8203250000000001,0.756725,0.12564999999999998,0.178075
10
+ chunk_svs_no_conv,0.672975,0.652775,0.33097499999999996,0.343075
static/models_data/llama-3.1-nemotron-70B-instruct/matrix.svg ADDED
static/models_data/llama-3.1-nemotron-70B-instruct/model_detail.html ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <p>
2
+ This open-source model was created by <a target="_blank" href="https://build.nvidia.com/">Nvidia</a>.
3
+ You can find the release <a target="_blank" href="https://build.nvidia.com/nvidia/llama-3_1-nemotron-70b-instruct">here</a>.
4
+ The model is available on the huggingface hub: <a target="_blank" href="https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF">https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF</a>.
5
+ The 70B model is an RLHF finetuned version of Llama-3.1-70B-Instruct, and supports up to 128K token contexts.
6
+ </p>
static/models_data/llama-3.1-nemotron-70B-instruct/ranks.svg ADDED
static/models_data/llama-3.1-nemotron-70B-instruct/structure.svg ADDED
static/models_data/ordinal.svg CHANGED