"""Module containing performance results for the Arcee-SuperNova model.""" results_arcee_supernova = { "name": "Arcee-SuperNova", "modelType": "Llama 3.1 70B", "configurations": [ { "instanceType": "g5.12xlarge", "quantization": "awq", "container": "TGI 2.2.0", "status": "OK", "tokensPerSecond": "33", "notes": "MAX_INPUT_TOKENS: 8192, MAX_TOTAL_TOKENS: 16384", }, { "instanceType": "p4d.24xlarge", "quantization": "awq", "container": "TGI 2.2.0", "status": "OK", "tokensPerSecond": "58", "notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768", }, { "instanceType": "p5.48xlarge", "quantization": "awq", "container": "TGI 2.2.0", "status": "OK", "tokensPerSecond": "73", "notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768", }, { "instanceType": "inf2.24xlarge", "configurations": [ { "quantization": "none", "container": "LMI 0.29+transformers-neuronx 0.11.351", "status": "KO", "tokensPerSecond": "-", "notes": "OOM bs=2,seqlen=4096 - SDK 2.19.1", }, { "quantization": "none", "container": "LMI 0.29+transformers-neuronx 0.11.351", "status": "KO", "tokensPerSecond": "-", "notes": "OOM bs=2,seqlen=2048 - SDK 2.19.1", }, ], }, { "instanceType": "inf2.48xlarge", "configurations": [ { "quantization": "none", "container": "LMI 0.29+transformers-neuronx 0.11.351", "status": "OK", "tokensPerSecond": "28", "notes": "bs=4,seqlen=4096 - SDK 2.19.1", }, { "quantization": "none", "container": "LMI 0.29+transformers-neuronx 0.11.351", "status": "OK", "tokensPerSecond": "24", "notes": "bs=2,seqlen=8192 - SDK 2.19.1", }, { "quantization": "none", "container": "LMI 0.29+transformers-neuronx 0.11.351", "status": "KO", "tokensPerSecond": "-", "notes": "OOM bs=2,seqlen=16384 - SDK 2.19.1", }, ], }, { "instanceType": "p4d.24xlarge", "quantization": "none", "container": "TGI 2.2.0", "status": "OK", "tokensPerSecond": "30", "notes": "", }, { "instanceType": "p5.48xlarge", "quantization": "none", "container": "TGI 2.2.0", "status": "OK", "tokensPerSecond": "58", "notes": "MAX_INPUT_TOKENS: 16384, MAX_TOTAL_TOKENS: 32768", }, ], }