Spaces:
Running
Running
"""Module containing performance results for the Arcee-Lite model.""" | |
results_arcee_lite = { | |
"name": "Arcee-Lite", | |
"modelType": "Qwen2 1.5B distilled from phi-3-medium 14B", | |
"configurations": [ | |
{ | |
"instanceType": "c6i.xlarge", | |
"quantization": "bitsandbytes-nf4", | |
"container": "TGI 2.2.0", | |
"status": "KO", | |
"tokensPerSecond": "-", | |
"notes": "OOM, might work with a prequantized model", | |
}, | |
{ | |
"instanceType": "c6i.2xlarge", | |
"quantization": "bitsandbytes-nf4", | |
"container": "TGI 2.2.0", | |
"status": "KO", | |
"tokensPerSecond": "-", | |
"notes": "OOM, might work with a prequantized model", | |
}, | |
{ | |
"instanceType": "c6i.4xlarge", | |
"configurations": [ | |
{ | |
"quantization": "none", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "10.7", | |
}, | |
{ | |
"quantization": "bitsandbytes (int8)", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "10.5", | |
}, | |
{ | |
"quantization": "bitsandbytes-nf4", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "10.6", | |
}, | |
], | |
}, | |
{ | |
"instanceType": "c7i.4xlarge", | |
"quantization": "none", | |
"container": "TGI 2.2.0", | |
"status": "waiting for quota", | |
"tokensPerSecond": "-", | |
}, | |
{ | |
"instanceType": "g5.xlarge", | |
"configurations": [ | |
{ | |
"quantization": "none", | |
"container": "TGI 2.2.0", | |
"status": "OK", | |
"tokensPerSecond": "110", | |
}, | |
{ | |
"quantization": "none", | |
"container": "DJL 0.28 vLLM", | |
"status": "OK", | |
"tokensPerSecond": "105", | |
"notes": '"OPTION_MAX_MODEL_LEN": "32768",', | |
}, | |
], | |
}, | |
{ | |
"instanceType": "g6e.2xlarge", | |
"configurations": [ | |
{ | |
"container": "TGI 2.2.0", | |
"quantization": "none", | |
"status": "OK", | |
"tokensPerSecond": "160", | |
}, | |
{ | |
"container": "SGLang 0.2.13", | |
"quantization": "none", | |
"status": "OK", | |
"tokensPerSecond": "167", | |
}, | |
{ | |
"container": "vLLM 0.5.5", | |
"quantization": "none", | |
"status": "OK", | |
"tokensPerSecond": "150", | |
}, | |
], | |
}, | |
], | |
} | |