"""Module containing performance results for the Arcee-Scribe model.""" results_arcee_scribe = { "name": "Arcee-Scribe", "modelType": "InternLM2.5 8B", "configurations": [ { "instanceType": "g5.2xlarge", "quantization": "none", "container": "DJL 0.28 vLLM", "status": "OK", "tokensPerSecond": 29, "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",', }, { "instanceType": "g5.12xlarge", "quantization": "none", "container": "DJL 0.28 vLLM", "status": "OK", "tokensPerSecond": 65, "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",\nNot supported by AutoAWQ and AutoGPTQ', }, { "instanceType": "g5.48xlarge", "quantization": "none", "container": "DJL 0.28 vLLM", "status": "OK", "tokensPerSecond": 80, "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",', }, { "instanceType": "g6.2xlarge", "quantization": "none", "container": "DJL 0.28 vLLM", "status": "OK", "tokensPerSecond": 16, "notes": '"OPTION_MAX_MODEL_LEN": "4096"', }, { "instanceType": "g6.12xlarge", "quantization": "none", "container": "DJL 0.28 vLLM", "status": "OK", "tokensPerSecond": 50, "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",', }, { "instanceType": "g6.48xlarge", "quantization": "none", "container": "DJL 0.28 vLLM", "status": "OK", "tokensPerSecond": 69, "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",', }, { "instanceType": "g6e.2xlarge", "quantization": "none", "container": "SGLang 0.2.13", "status": "OK", "tokensPerSecond": 46, }, { "instanceType": "p4d.24xlarge", "quantization": "none", "container": "DJL 0.28 vLLM", "status": "OK", "tokensPerSecond": 82, "notes": '"OPTION_MAX_MODEL_LEN": "32768",\n"TENSOR_PARALLEL_DEGREE": "max",', }, ], }