Julien Simon commited on
Commit
6b33c1f
1 Parent(s): d17a4c1

SuperNova g6e.2xlarge

Browse files
Files changed (1) hide show
  1. results_arcee_supernova.py +8 -0
results_arcee_supernova.py CHANGED
@@ -28,6 +28,14 @@ results_arcee_supernova = {
28
  "tokensPerSecond": "33",
29
  "notes": "MAX_INPUT_TOKENS: 8192, MAX_TOTAL_TOKENS: 16384",
30
  },
 
 
 
 
 
 
 
 
31
  {
32
  "instanceType": "p4d.24xlarge",
33
  "quantization": "awq",
 
28
  "tokensPerSecond": "33",
29
  "notes": "MAX_INPUT_TOKENS: 8192, MAX_TOTAL_TOKENS: 16384",
30
  },
31
+ {
32
+ "instanceType": "g6e.2xlarge",
33
+ "quantization": "awq (w4 g128)",
34
+ "container": "vLLM 0.6.2",
35
+ "status": "OK",
36
+ "tokensPerSecond": "18",
37
+ "notes": "--max-model-len 10000 --max-num-seqs 16 --enforce-eager",
38
+ },
39
  {
40
  "instanceType": "p4d.24xlarge",
41
  "quantization": "awq",