Spaces:
Running
Running
"""Module containing performance results for the Arcee-SuperNova model.""" | |
results_arcee_supernova = { | |
"name": "Arcee-SuperNova", | |
"modelType": "Llama 3.1 70B", | |
"configurations": [ | |
{ | |
"instanceType": "g5.12xlarge", | |
"quantization": "awq", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "33", | |
"notes": "MAX_INPUT_TOKENS: 8192, MAX_TOTAL_TOKENS: 16384", | |
}, | |
{ | |
"instanceType": "p4d.24xlarge", | |
"quantization": "awq", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "58", | |
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768", | |
}, | |
{ | |
"instanceType": "p5.48xlarge", | |
"quantization": "awq", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "73", | |
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768", | |
}, | |
{ | |
"instanceType": "inf2.24xlarge", | |
"configurations": [ | |
{ | |
"quantization": "none", | |
"container": "LMI 0.29+transformers-neuronx 0.11.351", | |
"status": "KO", | |
"tokensPerSecond": "-", | |
"notes": "OOM bs=2,seqlen=4096 - SDK 2.19.1", | |
}, | |
{ | |
"quantization": "none", | |
"container": "LMI 0.29+transformers-neuronx 0.11.351", | |
"status": "KO", | |
"tokensPerSecond": "-", | |
"notes": "OOM bs=2,seqlen=2048 - SDK 2.19.1", | |
}, | |
], | |
}, | |
{ | |
"instanceType": "inf2.48xlarge", | |
"configurations": [ | |
{ | |
"quantization": "none", | |
"container": "LMI 0.29+transformers-neuronx 0.11.351", | |
"status": "OK", | |
"tokensPerSecond": "28", | |
"notes": "bs=4,seqlen=4096 - SDK 2.19.1", | |
}, | |
{ | |
"quantization": "none", | |
"container": "LMI 0.29+transformers-neuronx 0.11.351", | |
"status": "OK", | |
"tokensPerSecond": "24", | |
"notes": "bs=2,seqlen=8192 - SDK 2.19.1", | |
}, | |
{ | |
"quantization": "none", | |
"container": "LMI 0.29+transformers-neuronx 0.11.351", | |
"status": "KO", | |
"tokensPerSecond": "-", | |
"notes": "OOM bs=2,seqlen=16384 - SDK 2.19.1", | |
}, | |
], | |
}, | |
{ | |
"instanceType": "p4d.24xlarge", | |
"quantization": "none", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "30", | |
"notes": "", | |
}, | |
{ | |
"instanceType": "p5.48xlarge", | |
"quantization": "none", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "58", | |
"notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768", | |
}, | |
], | |
} | |