Spaces:
Running
Running
"""Module containing performance results for the Llama-3-Supernova-Lite model.""" | |
results_llama_supernova_lite = { | |
"name": "Llama-3.1-SuperNova-Lite", | |
"modelType": "Llama 3.1 8B", | |
"configurations": [ | |
{ | |
"instanceType": "c7i.4xlarge", | |
"configurations": [ | |
{ | |
"quantization": "Q6_K", | |
"container": "llama.cpp 10/18/24", | |
"status": "OK", | |
"tokensPerSecond": "xxx", | |
"notes": "AMX enabled, Flash Attention enabled", | |
}, | |
{ | |
"quantization": "Q5_K", | |
"container": "llama.cpp 10/18/24", | |
"status": "OK", | |
"tokensPerSecond": "xxx", | |
"notes": "AMX enabled, Flash Attention enabled", | |
}, | |
{ | |
"quantization": "Q4_K", | |
"container": "llama.cpp 10/18/24", | |
"status": "OK", | |
"tokensPerSecond": "xxx", | |
"notes": "AMX enabled, Flash Attention enabled", | |
}, | |
{ | |
"quantization": "IQ4_XS", | |
"container": "llama.cpp 10/18/24", | |
"status": "OK", | |
"tokensPerSecond": "xxx", | |
"notes": "AMX enabled, Flash Attention enabled", | |
}, | |
], | |
}, | |
{ | |
"instanceType": "c7g.8xlarge", | |
"quantization": "Q4_0_8_8", | |
"container": "llama.cpp 9/18/24", | |
"status": "OK", | |
"tokensPerSecond": "39.7", | |
"notes": "requantized from Q4_K_S", | |
}, | |
{ | |
"instanceType": "c7g.16xlarge", | |
"quantization": "Q4_0_8_8", | |
"container": "llama.cpp 9/18/24", | |
"status": "OK", | |
"tokensPerSecond": "45.5", | |
"notes": "", | |
}, | |
{ | |
"instanceType": "c8g.4xlarge", | |
"quantization": "Q4_0_4_8", | |
"container": "llama.cpp 11/05/24", | |
"status": "OK", | |
"tokensPerSecond": "34", | |
"notes": "with Flash Attention", | |
}, | |
{ | |
"instanceType": "r8g.4xlarge", | |
"quantization": "Q4_0_4_8", | |
"container": "llama.cpp 9/11/24", | |
"status": "OK", | |
"tokensPerSecond": "40", | |
"notes": "with Flash Attention", | |
}, | |
{ | |
"instanceType": "r8g.8xlarge", | |
"quantization": "Q4_0_4_8", | |
"container": "llama.cpp 9/11/24", | |
"status": "OK", | |
"tokensPerSecond": "63", | |
"notes": "with Flash Attention", | |
}, | |
{ | |
"instanceType": "r8g.16xlarge", | |
"quantization": "Q4_0_4_8", | |
"container": "llama.cpp 9/11/24", | |
"status": "OK", | |
"tokensPerSecond": "70", | |
"notes": "with Flash Attention", | |
}, | |
], | |
} | |