Spaces:

arcee-ai
/

Benchmarks

Running

Benchmarks / results_arcee_agent.py

Julien Simon

Break results into one file per model

0c0f086 3 months ago

2.42 kB

	"""Module containing performance results for the Arcee-Agent model."""

	results_arcee_agent = {
	"name": "Arcee-Agent",
	"modelType": "Qwen2 7B",
	"notes": "",
	"configurations": [
	{
	"instanceType": "g5.2xlarge",
	"quantization": "none",
	"container": "TGI 2.2.0",
	"status": "OK",
	"tokensPerSecond": "30",
	},
	{
	"instanceType": "g5.12xlarge",
	"quantization": "none",
	"container": "TGI 2.2.0",
	"status": "OK",
	"tokensPerSecond": "83",
	},
	{
	"instanceType": "g5.48xlarge",
	"quantization": "none",
	"container": "TGI 2.2.0",
	"status": "KO",
	"tokensPerSecond": "-",
	"notes": "ValueError: `num_heads` must be divisible by `num_shards` (got `num_heads`: 28 and `num_shards`: 8\n\nSM_NUM_GPUS=7 doesn't work either because tensor size ares not a multiple of 7 (e.g., 512)",
	},
	{
	"instanceType": "g6.2xlarge",
	"quantization": "none",
	"container": "TGI 2.2.0",
	"status": "OK",
	"tokensPerSecond": "16.3",
	},
	{
	"instanceType": "g6.12xlarge",
	"quantization": "none",
	"container": "TGI 2.2.0",
	"status": "OK",
	"tokensPerSecond": "54.2",
	},
	{
	"instanceType": "inf2.*",
	"container": "TGI 2.2.0",
	"status": "not supported",
	"tokensPerSecond": "-",
	"notes": "Qwen2: TGI OK, Neuron SDK KO, optimum-neuron KO",
	},
	{
	"instanceType": "g6e.2xlarge",
	"configurations": [
	{
	"container": "TGI 2.2.0",
	"quantization": "none",
	"status": "OK",
	"tokensPerSecond": "45",
	},
	{
	"container": "SGLang 0.2.13",
	"quantization": "none",
	"status": "OK",
	"tokensPerSecond": "48",
	},
	{
	"container": "vLLM 0.5.5",
	"quantization": "none",
	"status": "OK",
	"tokensPerSecond": "45.7",
	},
	],
	},
	],
	}