Spaces:

arcee-ai
/

Benchmarks

Running

App Files Files Community

Julien Simon commited on Sep 13

Commit

430197f

•

1 Parent(s): 3a66d4b

Update

Browse files

Files changed (3) hide show

results_arcee_nova.py +0 -7
results_arcee_supernova.py +14 -14
results_llama_spark.py +3 -10

results_arcee_nova.py CHANGED Viewed

@@ -153,12 +153,5 @@ results_arcee_nova = {
             "tokensPerSecond": "58",
             "notes": '"MAX_INPUT_LENGTH": "16384", "MAX_TOTAL_TOKENS": "32768",',
         },
-        {
-            "instanceType": "inf2.*",
-            "container": "TGI 2.2.0",
-            "status": "not supported",
-            "tokensPerSecond": "-",
-            "notes": "Qwen2: TGI OK, Neuron SDK KO, optimum-neuron KO",
-        },
     ],
 }

             "tokensPerSecond": "58",
             "notes": '"MAX_INPUT_LENGTH": "16384", "MAX_TOTAL_TOKENS": "32768",',
         },
     ],
 }

results_arcee_supernova.py CHANGED Viewed

@@ -33,17 +33,17 @@ results_arcee_supernova = {
             "configurations": [
                 {
                     "quantization": "none",
-                    "container": "transformers-neuronx",
                     "status": "KO",
                     "tokensPerSecond": "-",
-                    "notes": "OOM bs=2,seqlen=4096",
                 },
                 {
                     "quantization": "none",
-                    "container": "transformers-neuronx",
                     "status": "KO",
                     "tokensPerSecond": "-",
-                    "notes": "OOM bs=2,seqlen=2048",
                 },
             ],
         },
@@ -52,24 +52,24 @@ results_arcee_supernova = {
             "configurations": [
                 {
                     "quantization": "none",
-                    "container": "transformers-neuronx",
                     "status": "OK",
                     "tokensPerSecond": "28",
-                    "notes": "bs=4,seqlen=4096",
                 },
                 {
                     "quantization": "none",
-                    "container": "transformers-neuronx",
                     "status": "OK",
                     "tokensPerSecond": "24",
-                    "notes": "bs=2,seqlen=8192",
                 },
                 {
                     "quantization": "none",
-                    "container": "transformers-neuronx",
-                    "status": "?",
-                    "tokensPerSecond": "KO",
-                    "notes": "OOM bs=2,seqlen=16384",
                 },
             ],
         },
@@ -85,8 +85,8 @@ results_arcee_supernova = {
             "instanceType": "p5.48xlarge",
             "quantization": "none",
             "container": "TGI 2.2.0",
-            "status": "?",
-            "tokensPerSecond": "?",
             "notes": "",
         },
     ],

             "configurations": [
                 {
                     "quantization": "none",
+                    "container": "LMI 0.29+transformers-neuronx 0.11.351",
                     "status": "KO",
                     "tokensPerSecond": "-",
+                    "notes": "OOM bs=2,seqlen=4096 - SDK 2.19.1",
                 },
                 {
                     "quantization": "none",
+                    "container": "LMI 0.29+transformers-neuronx 0.11.351",
                     "status": "KO",
                     "tokensPerSecond": "-",
+                    "notes": "OOM bs=2,seqlen=2048 - SDK 2.19.1",
                 },
             ],
         },
             "configurations": [
                 {
                     "quantization": "none",
+                    "container": "LMI 0.29+transformers-neuronx 0.11.351",
                     "status": "OK",
                     "tokensPerSecond": "28",
+                    "notes": "bs=4,seqlen=4096 - SDK 2.19.1",
                 },
                 {
                     "quantization": "none",
+                    "container": "LMI 0.29+transformers-neuronx 0.11.351",
                     "status": "OK",
                     "tokensPerSecond": "24",
+                    "notes": "bs=2,seqlen=8192 - SDK 2.19.1",
                 },
                 {
                     "quantization": "none",
+                    "container": "LMI 0.29+transformers-neuronx 0.11.351",
+                    "status": "KO",
+                    "tokensPerSecond": "-",
+                    "notes": "OOM bs=2,seqlen=16384 - SDK 2.19.1",
                 },
             ],
         },
             "instanceType": "p5.48xlarge",
             "quantization": "none",
             "container": "TGI 2.2.0",
+            "status": "OK",
+            "tokensPerSecond": "58",
             "notes": "",
         },
     ],

results_llama_spark.py CHANGED Viewed

@@ -83,7 +83,7 @@ results_llama_spark = {
                     "tokensPerSecond": "123",
                 },
                 {
-                    "container": "vLLM 0.5.5",
                     "quantization": "none",
                     "status": "OK",
                     "tokensPerSecond": "106",
@@ -98,20 +98,13 @@ results_llama_spark = {
             "tokensPerSecond": "145",
             "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
         },
-        {
-            "instanceType": "inf2.*",
-            "container": "TGI 2.2.0",
-            "status": "not supported",
-            "tokensPerSecond": "-",
-            "notes": "Llama-3.1: TGI OK, Neuron SDK OK, optimum-neuron KO",
-        },
         {
             "instanceType": "inf2.2xlarge",
-            "container": "transformers-neuronx 0.11.351",
             "quantization": "none",
             "status": "OK",
             "tokensPerSecond": "24",
-            "notes": "Neuron SDK 2.19.1",
         },
     ],
 }

                     "tokensPerSecond": "123",
                 },
                 {
+                    "container": "LMI 0.29+vLLM 0.5.5",
                     "quantization": "none",
                     "status": "OK",
                     "tokensPerSecond": "106",
             "tokensPerSecond": "145",
             "notes": '"MAX_INPUT_TOKENS": "40960", "MAX_TOTAL_TOKENS": "81920"\n\n64K/128K fails (even with 4-bit)',
         },
         {
             "instanceType": "inf2.2xlarge",
+            "container": "LMI 0.29+transformers-neuronx 0.11.351",
             "quantization": "none",
             "status": "OK",
             "tokensPerSecond": "24",
+            "notes": "SDK 2.19.1",
         },
     ],
 }