Julien Simon commited on
Commit
bf877db
1 Parent(s): ce951db
Files changed (1) hide show
  1. results_llama_supernova_lite.py +8 -8
results_llama_supernova_lite.py CHANGED
@@ -21,28 +21,28 @@ results_llama_supernova_lite = {
21
  "notes": "",
22
  },
23
  {
24
- "instanceType": "r8g.8xlarge",
25
- "quantization": "Q8",
26
  "container": "llama.cpp 9/11/24",
27
  "status": "OK",
28
- "tokensPerSecond": "37",
29
- "notes": "",
30
  },
31
  {
32
  "instanceType": "r8g.8xlarge",
33
  "quantization": "Q4_0_4_8",
34
  "container": "llama.cpp 9/11/24",
35
  "status": "OK",
36
- "tokensPerSecond": "57",
37
- "notes": "",
38
  },
39
  {
40
  "instanceType": "r8g.16xlarge",
41
  "quantization": "Q4_0_4_8",
42
  "container": "llama.cpp 9/11/24",
43
  "status": "OK",
44
- "tokensPerSecond": "65",
45
- "notes": "",
46
  },
47
  ],
48
  }
 
21
  "notes": "",
22
  },
23
  {
24
+ "instanceType": "r8g.4xlarge",
25
+ "quantization": "Q4_0_4_8",
26
  "container": "llama.cpp 9/11/24",
27
  "status": "OK",
28
+ "tokensPerSecond": "49",
29
+ "notes": "with Flash Attention",
30
  },
31
  {
32
  "instanceType": "r8g.8xlarge",
33
  "quantization": "Q4_0_4_8",
34
  "container": "llama.cpp 9/11/24",
35
  "status": "OK",
36
+ "tokensPerSecond": "63",
37
+ "notes": "with Flash Attention",
38
  },
39
  {
40
  "instanceType": "r8g.16xlarge",
41
  "quantization": "Q4_0_4_8",
42
  "container": "llama.cpp 9/11/24",
43
  "status": "OK",
44
+ "tokensPerSecond": "70",
45
+ "notes": "with Flash Attention",
46
  },
47
  ],
48
  }