kreas commited on
Commit
b6acf62
1 Parent(s): 6073480

Upload /Llama-2-7b-hf/nf4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json with huggingface_hub

Browse files
Llama-2-7b-hf/nf4_batch_size_1_sq_len_256_new_tokens_256/benchmark_report.json CHANGED
@@ -2,257 +2,222 @@
2
  "prefill": {
3
  "memory": {
4
  "unit": "MB",
5
- "max_ram": 1015.881728,
6
  "max_vram": 4905.238528,
7
  "max_reserved": 4420.796416,
8
  "max_allocated": 4252.667392
9
  },
10
  "latency": {
11
  "unit": "s",
12
- "mean": 0.042467162730330105,
13
- "stdev": 0.0006609647717465549,
14
  "values": [
15
- 0.048517601013183594,
16
- 0.04331315231323242,
17
- 0.043079681396484375,
18
- 0.04321279907226563,
19
- 0.04323123168945313,
20
- 0.043218944549560545,
21
- 0.043480064392089846,
22
- 0.04548710250854492,
23
- 0.04911718368530273,
24
- 0.0445296630859375,
25
- 0.042159103393554685,
26
- 0.04237823867797851,
27
- 0.04234444808959961,
28
- 0.0423741455078125,
29
- 0.04237619018554688,
30
- 0.04238336181640625,
31
- 0.04228633499145508,
32
- 0.042385406494140625,
33
- 0.04228409576416016,
34
- 0.04238336181640625,
35
- 0.04239462280273437,
36
- 0.04246220779418945,
37
- 0.042264575958251956,
38
- 0.042359809875488284,
39
- 0.04222873687744141,
40
- 0.042570751190185545,
41
- 0.04218265533447266,
42
- 0.042363903045654294,
43
- 0.04226764678955078,
44
- 0.04237209701538086,
45
- 0.04212940979003906,
46
- 0.042363903045654294,
47
- 0.04219612884521484,
48
- 0.04238156890869141,
49
- 0.04231782531738281,
50
- 0.04241408157348633,
51
- 0.042324031829833984,
52
- 0.042387454986572266,
53
- 0.04229529571533203,
54
- 0.04235776138305664,
55
- 0.04212838363647461,
56
- 0.04236800003051758,
57
- 0.04221440124511719,
58
- 0.04236800003051758,
59
- 0.04280115127563477,
60
- 0.04237619018554688,
61
- 0.04229430389404297,
62
- 0.0424007682800293,
63
- 0.04217139053344727,
64
- 0.04236185455322266,
65
- 0.04226867294311523,
66
- 0.042393600463867184,
67
- 0.042243072509765625,
68
- 0.04239257431030274,
69
- 0.04221952056884765,
70
- 0.042352638244628905,
71
- 0.042827934265136716,
72
- 0.04238643264770508,
73
- 0.042369022369384765,
74
- 0.0424161262512207,
75
- 0.04222873687744141,
76
- 0.042382335662841795,
77
- 0.042278911590576174,
78
- 0.04239052963256836,
79
- 0.042237953186035154,
80
- 0.04238147354125977,
81
- 0.042308609008789064,
82
- 0.04239052963256836,
83
- 0.04216934585571289,
84
- 0.042385406494140625,
85
- 0.04228710556030273,
86
- 0.0423741455078125,
87
- 0.04220927810668945,
88
- 0.04236185455322266,
89
- 0.04229119873046875,
90
- 0.04249103927612305,
91
- 0.042395648956298826,
92
- 0.04238131332397461,
93
- 0.04235673522949219,
94
- 0.04242124938964844,
95
- 0.04230144119262695,
96
- 0.04241116714477539,
97
- 0.042425342559814457,
98
- 0.04239052963256836,
99
- 0.042248191833496096,
100
- 0.04243865585327149,
101
- 0.04233126449584961,
102
- 0.04239769744873047,
103
- 0.04232191848754883,
104
- 0.042420223236083986,
105
- 0.042331134796142575,
106
- 0.042379264831542966,
107
- 0.0426506233215332,
108
- 0.04239462280273437,
109
- 0.04236697769165039,
110
- 0.042393600463867184,
111
- 0.042396671295166014,
112
- 0.04249292755126953,
113
- 0.04233011245727539,
114
- 0.042396671295166014,
115
- 0.04233011245727539,
116
- 0.042482688903808595,
117
- 0.042241024017333983,
118
- 0.04264243316650391,
119
- 0.04236288070678711,
120
- 0.042363903045654294,
121
- 0.04232191848754883,
122
- 0.04237311935424805,
123
- 0.042431488037109374,
124
- 0.04239052963256836,
125
- 0.0423741455078125,
126
- 0.042412033081054686,
127
- 0.04233523178100586,
128
- 0.04239462280273437,
129
- 0.042395648956298826,
130
- 0.04238950347900391,
131
- 0.04253696060180664,
132
- 0.04238131332397461,
133
- 0.042426368713378904,
134
- 0.04243865585327149,
135
- 0.042415103912353515,
136
- 0.042521598815917966,
137
- 0.042327041625976565,
138
- 0.04240588760375977,
139
- 0.042412033081054686,
140
- 0.04235059356689453,
141
- 0.042264575958251956,
142
- 0.042379264831542966,
143
- 0.04227174377441406,
144
- 0.042433761596679685,
145
- 0.04225331115722656,
146
- 0.04241222381591797,
147
- 0.04238131332397461,
148
- 0.04242943954467773,
149
- 0.04240691375732422,
150
- 0.042363903045654294,
151
- 0.04223004913330078,
152
- 0.042385406494140625,
153
- 0.04230758285522461,
154
- 0.04237004852294922,
155
- 0.042744831085205076,
156
- 0.04236697769165039,
157
- 0.04225228881835937,
158
- 0.04240793609619141,
159
- 0.04233830261230469,
160
- 0.04238336181640625,
161
- 0.042308609008789064,
162
- 0.042395648956298826,
163
- 0.04227993774414063,
164
- 0.04239567947387695,
165
- 0.04232499313354492,
166
- 0.042812416076660156,
167
- 0.0422083511352539,
168
- 0.04241100692749023,
169
- 0.042234878540039066,
170
- 0.042392894744873046,
171
- 0.04212940979003906,
172
- 0.04237823867797851,
173
- 0.04230451202392578,
174
- 0.042379264831542966,
175
- 0.04227174377441406,
176
- 0.04237619018554688,
177
- 0.04218982315063476,
178
- 0.04238131332397461,
179
- 0.04234854507446289,
180
- 0.04237516784667969,
181
- 0.04220927810668945,
182
- 0.042409984588623044,
183
- 0.042349567413330076,
184
- 0.04237004852294922,
185
- 0.04219903945922852,
186
- 0.04237516784667969,
187
- 0.04221440124511719,
188
- 0.042363903045654294,
189
- 0.04225228881835937,
190
- 0.042369022369384765,
191
- 0.04212428665161133,
192
- 0.04236800003051758,
193
- 0.0422737922668457,
194
- 0.04237107086181641,
195
- 0.04219903945922852,
196
- 0.0424161262512207,
197
- 0.04229529571533203,
198
- 0.04235366439819336,
199
- 0.042248191833496096,
200
- 0.04236800003051758,
201
- 0.04218572616577149,
202
- 0.04277248001098633,
203
- 0.042382335662841795,
204
- 0.042355712890625,
205
- 0.042428417205810545,
206
- 0.04238643264770508,
207
- 0.042352638244628905,
208
- 0.04240281677246094,
209
- 0.04240281677246094,
210
- 0.042387454986572266,
211
- 0.042262527465820314,
212
- 0.04239974212646484,
213
- 0.04257484817504883,
214
- 0.042591552734375,
215
- 0.04235673522949219,
216
- 0.042393600463867184,
217
- 0.04225024032592774,
218
- 0.04242345428466797,
219
- 0.04231782531738281,
220
- 0.04240691375732422,
221
- 0.04241222381591797,
222
- 0.04261171340942383,
223
- 0.042246337890625,
224
- 0.04240793609619141,
225
- 0.04248883056640625,
226
- 0.042380287170410154,
227
- 0.04228505706787109,
228
- 0.04241100692749023,
229
- 0.042417152404785156,
230
- 0.04240691375732422,
231
- 0.04239257431030274,
232
- 0.042386497497558594,
233
- 0.04246441650390625,
234
- 0.04236185455322266,
235
- 0.042369022369384765,
236
- 0.042417152404785156,
237
- 0.042482688903808595,
238
- 0.042385406494140625,
239
- 0.042297344207763675,
240
- 0.04238848114013672,
241
- 0.04245811080932617,
242
- 0.04240281677246094,
243
- 0.04224227142333985,
244
- 0.04238336181640625,
245
- 0.042420223236083986,
246
- 0.042389823913574216,
247
- 0.04247465515136719,
248
- 0.042393600463867184,
249
- 0.042292224884033204,
250
- 0.0428042221069336
251
  ]
252
  },
253
  "throughput": {
254
  "unit": "tokens/s",
255
- "value": 6028.187040081311
256
  },
257
  "energy": null,
258
  "efficiency": null
@@ -260,23 +225,22 @@
260
  "decode": {
261
  "memory": {
262
  "unit": "MB",
263
- "max_ram": 1015.881728,
264
  "max_vram": 5255.462912,
265
  "max_reserved": 4771.0208,
266
  "max_allocated": 4715.61216
267
  },
268
  "latency": {
269
  "unit": "s",
270
- "mean": 8.599395771026611,
271
- "stdev": 0.03203231048584154,
272
  "values": [
273
- 8.56736346054077,
274
- 8.631428081512453
275
  ]
276
  },
277
  "throughput": {
278
  "unit": "tokens/s",
279
- "value": 29.653246203547816
280
  },
281
  "energy": null,
282
  "efficiency": null
@@ -285,524 +249,269 @@
285
  "memory": null,
286
  "latency": {
287
  "unit": "s",
288
- "mean": 0.033723120670692594,
289
- "stdev": 0.0007542579259829491,
290
  "values": [
291
- 0.03367903900146484,
292
- 0.034095104217529294,
293
- 0.03362924957275391,
294
- 0.033646526336669924,
295
- 0.033672191619873046,
296
- 0.03379916763305664,
297
- 0.03361587142944336,
298
- 0.03376639938354492,
299
- 0.03357798385620117,
300
- 0.03353190231323242,
301
- 0.033588222503662106,
302
- 0.033863937377929684,
303
- 0.03352755355834961,
304
- 0.033552383422851564,
305
- 0.03365494537353515,
306
- 0.033521503448486326,
307
- 0.03344486236572266,
308
- 0.03351980972290039,
309
- 0.0335296630859375,
310
- 0.03351244735717773,
311
- 0.033667072296142575,
312
- 0.03366400146484375,
313
- 0.03353702545166016,
314
- 0.03360665512084961,
315
- 0.033732608795166014,
316
- 0.033527809143066405,
317
- 0.033498111724853515,
318
- 0.033559776306152346,
319
- 0.033648414611816405,
320
- 0.03360153579711914,
321
- 0.03352678298950195,
322
- 0.03374694442749023,
323
- 0.0335781135559082,
324
- 0.03363315200805664,
325
- 0.033604606628417966,
326
- 0.03352371215820313,
327
- 0.033670143127441404,
328
- 0.03367424011230469,
329
- 0.03356979370117188,
330
- 0.03373056030273437,
331
- 0.033708030700683594,
332
- 0.03353395080566406,
333
- 0.033614849090576174,
334
- 0.03355852890014648,
335
- 0.03376332855224609,
336
- 0.03358617782592774,
337
- 0.033514495849609374,
338
- 0.033640735626220705,
339
- 0.033441600799560545,
340
- 0.03353900909423828,
341
- 0.03348188781738281,
342
- 0.03361465454101562,
343
- 0.03352268981933594,
344
- 0.033567745208740236,
345
- 0.03363759994506836,
346
- 0.033532703399658206,
347
- 0.0335175666809082,
348
- 0.0339835205078125,
349
- 0.03363119888305664,
350
- 0.0335728645324707,
351
- 0.033532928466796875,
352
- 0.03351347351074219,
353
- 0.03356979370117188,
354
- 0.03359231948852539,
355
- 0.03363225555419922,
356
- 0.03357807922363281,
357
- 0.03343247985839844,
358
- 0.03353497695922852,
359
- 0.03351347351074219,
360
- 0.0334202880859375,
361
- 0.03354009628295898,
362
- 0.033796127319335935,
363
- 0.03371942520141601,
364
- 0.03353379058837891,
365
- 0.03362918472290039,
366
- 0.03350140762329101,
367
- 0.03364742279052734,
368
- 0.03346121597290039,
369
- 0.0335022087097168,
370
- 0.0335175666809082,
371
- 0.033584129333496096,
372
- 0.033710079193115236,
373
- 0.03355574417114258,
374
- 0.03373641586303711,
375
- 0.03346944046020508,
376
- 0.033544193267822264,
377
- 0.03350352096557617,
378
- 0.03355516815185547,
379
- 0.033481727600097655,
380
- 0.03347359848022461,
381
- 0.0335082893371582,
382
- 0.033446910858154294,
383
- 0.03348992156982422,
384
- 0.03352883148193359,
385
- 0.033433727264404293,
386
- 0.03351744079589844,
387
- 0.0336445426940918,
388
- 0.03349708938598633,
389
- 0.03350527954101563,
390
- 0.033516544342041016,
391
- 0.033527935028076175,
392
- 0.03341606521606445,
393
- 0.03358003234863281,
394
- 0.03350527954101563,
395
- 0.03360563278198242,
396
- 0.0335093765258789,
397
- 0.03366604614257813,
398
- 0.03351574325561523,
399
- 0.03360847854614258,
400
- 0.03349913787841797,
401
- 0.033751041412353515,
402
- 0.03354214477539062,
403
- 0.03370086288452148,
404
- 0.03360153579711914,
405
- 0.033567745208740236,
406
- 0.03361280059814453,
407
- 0.03362508773803711,
408
- 0.03391795349121094,
409
- 0.03364863967895508,
410
- 0.033498111724853515,
411
- 0.033689697265625,
412
- 0.03361782455444336,
413
- 0.03367424011230469,
414
- 0.03368243026733399,
415
- 0.033576961517333984,
416
- 0.033584129333496096,
417
- 0.0337520637512207,
418
- 0.03362303924560547,
419
- 0.03363532638549805,
420
- 0.03349094390869141,
421
- 0.03344412612915039,
422
- 0.03370774459838867,
423
- 0.03375833511352539,
424
- 0.03355532836914062,
425
- 0.03360358428955078,
426
- 0.0337336311340332,
427
- 0.03346124649047852,
428
- 0.033522720336914065,
429
- 0.033508319854736325,
430
- 0.03347353744506836,
431
- 0.033693695068359376,
432
- 0.033535999298095705,
433
- 0.03356467056274414,
434
- 0.03382271957397461,
435
- 0.03361382293701172,
436
- 0.033459201812744144,
437
- 0.03358310317993164,
438
- 0.03362303924560547,
439
- 0.0335032958984375,
440
- 0.03348678588867188,
441
- 0.03362937545776367,
442
- 0.033528640747070314,
443
- 0.03379916763305664,
444
- 0.033650688171386715,
445
- 0.033576961517333984,
446
- 0.03346739196777344,
447
- 0.033620990753173825,
448
- 0.033701889038085936,
449
- 0.033667072296142575,
450
- 0.033514495849609374,
451
- 0.03357593536376953,
452
- 0.033729534149169925,
453
- 0.033511425018310545,
454
- 0.03358617782592774,
455
- 0.03355033493041992,
456
- 0.033583263397216796,
457
- 0.033570655822753905,
458
- 0.03360358428955078,
459
- 0.03356364822387695,
460
- 0.03360665512084961,
461
- 0.03374899291992187,
462
- 0.033718273162841796,
463
- 0.03359743881225586,
464
- 0.033584129333496096,
465
- 0.03356364822387695,
466
- 0.0336629753112793,
467
- 0.033620990753173825,
468
- 0.033919998168945316,
469
- 0.03372768020629883,
470
- 0.033554271697998045,
471
- 0.03358832168579102,
472
- 0.03361881637573242,
473
- 0.03367833709716797,
474
- 0.03359027099609375,
475
- 0.03363532638549805,
476
- 0.03375001525878906,
477
- 0.03360873413085937,
478
- 0.033424510955810546,
479
- 0.03377056121826172,
480
- 0.03363510513305664,
481
- 0.033546241760253906,
482
- 0.03377356719970703,
483
- 0.033805313110351565,
484
- 0.03364147186279297,
485
- 0.033600608825683595,
486
- 0.03353180694580078,
487
- 0.0335912971496582,
488
- 0.03360768127441406,
489
- 0.03358415985107422,
490
- 0.03361276626586914,
491
- 0.03357900619506836,
492
- 0.03366912078857422,
493
- 0.03369267272949219,
494
- 0.033539070129394534,
495
- 0.033519615173339845,
496
- 0.033642494201660156,
497
- 0.03354019165039063,
498
- 0.033525665283203124,
499
- 0.03361711883544922,
500
- 0.03361974334716797,
501
- 0.03349724960327148,
502
- 0.033764190673828125,
503
- 0.0334837760925293,
504
- 0.033565696716308595,
505
- 0.03359027099609375,
506
- 0.033501182556152344,
507
- 0.033549312591552735,
508
- 0.03354111862182617,
509
- 0.03348787307739258,
510
- 0.033649665832519535,
511
- 0.03352371215820313,
512
- 0.03361075210571289,
513
- 0.03359539031982422,
514
- 0.033685504913330076,
515
- 0.03357491302490234,
516
- 0.03379097747802735,
517
- 0.03350630569458008,
518
- 0.033620990753173825,
519
- 0.03346944046020508,
520
- 0.033530048370361325,
521
- 0.03350713729858398,
522
- 0.0335022087097168,
523
- 0.03343974304199219,
524
- 0.0335912971496582,
525
- 0.03343462371826172,
526
- 0.03355955123901367,
527
- 0.03342131042480469,
528
- 0.0340316162109375,
529
- 0.033539070129394534,
530
- 0.03346944046020508,
531
- 0.03349606323242187,
532
- 0.03350425720214844,
533
- 0.033500160217285156,
534
- 0.03361280059814453,
535
- 0.033909759521484374,
536
- 0.033649665832519535,
537
- 0.03361996841430664,
538
- 0.03359231948852539,
539
- 0.033546241760253906,
540
- 0.0335206413269043,
541
- 0.0335994873046875,
542
- 0.033827838897705076,
543
- 0.03380326461791992,
544
- 0.03357900619506836,
545
- 0.033546432495117184,
546
- 0.033893375396728515,
547
- 0.033667072296142575,
548
- 0.033667072296142575,
549
- 0.03363942337036133,
550
- 0.033555454254150394,
551
- 0.033598464965820314,
552
- 0.033509407043457035,
553
- 0.03363529586791992,
554
- 0.033582080841064454,
555
- 0.03367424011230469,
556
- 0.03362303924560547,
557
- 0.03373875045776367,
558
- 0.033576961517333984,
559
- 0.033649761199951174,
560
- 0.03349187088012695,
561
- 0.03364863967895508,
562
- 0.033584320068359375,
563
- 0.033551166534423825,
564
- 0.03348787307739258,
565
- 0.033719551086425784,
566
- 0.03360128021240234,
567
- 0.03377151870727539,
568
- 0.03352492904663086,
569
- 0.03367200088500977,
570
- 0.03359231948852539,
571
- 0.03360255813598633,
572
- 0.03355648040771484,
573
- 0.03363532638549805,
574
- 0.033584129333496096,
575
- 0.033532928466796875,
576
- 0.03352268981933594,
577
- 0.033827838897705076,
578
- 0.03350630569458008,
579
- 0.0335728645324707,
580
- 0.033484798431396484,
581
- 0.03364556884765625,
582
- 0.033462272644042966,
583
- 0.03363225555419922,
584
- 0.033565696716308595,
585
- 0.033704959869384765,
586
- 0.03361894226074219,
587
- 0.034223102569580076,
588
- 0.03355340957641602,
589
- 0.03362835311889648,
590
- 0.03361260986328125,
591
- 0.033585151672363284,
592
- 0.03353811264038086,
593
- 0.033615806579589846,
594
- 0.03343564987182617,
595
- 0.03364044952392578,
596
- 0.0336363525390625,
597
- 0.033729534149169925,
598
- 0.03359539031982422,
599
- 0.033632320404052736,
600
- 0.03352467346191406,
601
- 0.03403673553466797,
602
- 0.03348889541625977,
603
- 0.03359334564208984,
604
- 0.033519615173339845,
605
- 0.03355257415771484,
606
- 0.033492801666259765,
607
- 0.03364371109008789,
608
- 0.033594303131103516,
609
- 0.03369868850708008,
610
- 0.03351859283447266,
611
- 0.03353190231323242,
612
- 0.0335206413269043,
613
- 0.033588222503662106,
614
- 0.03347763061523437,
615
- 0.03348992156982422,
616
- 0.03362815856933594,
617
- 0.03365692901611328,
618
- 0.03376630401611328,
619
- 0.03358003234863281,
620
- 0.03367628860473633,
621
- 0.033670143127441404,
622
- 0.033584129333496096,
623
- 0.03367628860473633,
624
- 0.033584129333496096,
625
- 0.03358003234863281,
626
- 0.03368960189819336,
627
- 0.03364863967895508,
628
- 0.0335206413269043,
629
- 0.033588222503662106,
630
- 0.033568767547607424,
631
- 0.03355459213256836,
632
- 0.033698654174804686,
633
- 0.03367424011230469,
634
- 0.03360563278198242,
635
- 0.033555454254150394,
636
- 0.03350735855102539,
637
- 0.033648609161376956,
638
- 0.03350732803344727,
639
- 0.03362918472290039,
640
- 0.033598464965820314,
641
- 0.03358310317993164,
642
- 0.033928192138671875,
643
- 0.03349923324584961,
644
- 0.03360348892211914,
645
- 0.0335728645324707,
646
- 0.03362201690673828,
647
- 0.03391795349121094,
648
- 0.033555454254150394,
649
- 0.03356159973144531,
650
- 0.0336814079284668,
651
- 0.033651744842529294,
652
- 0.03361497497558594,
653
- 0.0335450553894043,
654
- 0.03351580810546875,
655
- 0.03373846435546875,
656
- 0.0335810546875,
657
- 0.033465343475341795,
658
- 0.03353702545166016,
659
- 0.0335013427734375,
660
- 0.03357782363891602,
661
- 0.03403571319580078,
662
- 0.033627166748046874,
663
- 0.03364767837524414,
664
- 0.03356047821044922,
665
- 0.033729534149169925,
666
- 0.03368054580688477,
667
- 0.033608543395996095,
668
- 0.033582080841064454,
669
- 0.03363532638549805,
670
- 0.03355955123901367,
671
- 0.03358310317993164,
672
- 0.03362508773803711,
673
- 0.03377766418457031,
674
- 0.03361820983886719,
675
- 0.033477344512939454,
676
- 0.033567745208740236,
677
- 0.03341033554077148,
678
- 0.03353673553466797,
679
- 0.03342131042480469,
680
- 0.0334837760925293,
681
- 0.03346636962890625,
682
- 0.03382601547241211,
683
- 0.033547039031982424,
684
- 0.033486881256103516,
685
- 0.03350012969970703,
686
- 0.03364147186279297,
687
- 0.03338751983642578,
688
- 0.033544193267822264,
689
- 0.03377356719970703,
690
- 0.03358617782592774,
691
- 0.03361996841430664,
692
- 0.03342745590209961,
693
- 0.03360358428955078,
694
- 0.03361587142944336,
695
- 0.0336732177734375,
696
- 0.03346432113647461,
697
- 0.033535999298095705,
698
- 0.033462272644042966,
699
- 0.03345817565917969,
700
- 0.033616897583007815,
701
- 0.033699840545654294,
702
- 0.03346124649047852,
703
- 0.03354009628295898,
704
- 0.033508350372314456,
705
- 0.03351248168945312,
706
- 0.033476638793945315,
707
- 0.03420975875854492,
708
- 0.03355542373657226,
709
- 0.0336097297668457,
710
- 0.03361177444458008,
711
- 0.03355033493041992,
712
- 0.033569950103759766,
713
- 0.03987760162353516,
714
- 0.039078048706054684,
715
- 0.03925167846679688,
716
- 0.0383559684753418,
717
- 0.038001953125,
718
- 0.03823791885375977,
719
- 0.03890585708618164,
720
- 0.03825151824951172,
721
- 0.03803955078125,
722
- 0.03559936141967773,
723
- 0.03567308807373047,
724
- 0.03725107192993164,
725
- 0.03808870315551758,
726
- 0.036789249420166016,
727
- 0.03564748764038086,
728
- 0.03366937637329102,
729
- 0.03364134216308594,
730
- 0.03357171249389648,
731
- 0.033396736145019534,
732
- 0.03352883148193359,
733
- 0.033667072296142575,
734
- 0.03363020706176758,
735
- 0.033617919921875,
736
- 0.033544193267822264,
737
- 0.033683456420898435,
738
- 0.03356979370117188,
739
- 0.03340508651733398,
740
- 0.03358294296264648,
741
- 0.033942527770996093,
742
- 0.03343769454956055,
743
- 0.03358924865722656,
744
- 0.03358323287963867,
745
- 0.03440729522705078,
746
- 0.03371212768554688,
747
- 0.03364761734008789,
748
- 0.03349932861328125,
749
- 0.033621822357177734,
750
- 0.033546241760253906,
751
- 0.03354217529296875,
752
- 0.03358307266235352,
753
- 0.03364863967895508,
754
- 0.03382089614868164,
755
- 0.0336146240234375,
756
- 0.03368960189819336,
757
- 0.03351039886474609,
758
- 0.03358937454223633,
759
- 0.03356659317016602,
760
- 0.03346124649047852,
761
- 0.03352166366577149,
762
- 0.03363532638549805,
763
- 0.03348787307739258,
764
- 0.033537311553955076,
765
- 0.03388079833984375,
766
- 0.033529857635498046,
767
- 0.033588222503662106,
768
- 0.03347251129150391,
769
- 0.03355875015258789,
770
- 0.03351836776733398,
771
- 0.03363020706176758,
772
- 0.033732769012451175,
773
- 0.03356143951416016,
774
- 0.03349094390869141,
775
- 0.03346022415161133,
776
- 0.03352390289306641,
777
- 0.033569599151611326,
778
- 0.033637374877929685,
779
- 0.034272254943847655,
780
- 0.0335810546875,
781
- 0.03355648040771484,
782
- 0.033538047790527346,
783
- 0.03346636962890625,
784
- 0.03358924865722656,
785
- 0.03346636962890625,
786
- 0.033432575225830076,
787
- 0.033492992401123044,
788
- 0.033538238525390625,
789
- 0.03342623901367187,
790
- 0.03347251129150391,
791
- 0.033470462799072266,
792
- 0.03345510482788086,
793
- 0.033585151672363284,
794
- 0.03352166366577149,
795
- 0.03354745483398437,
796
- 0.03356550216674805,
797
- 0.033538047790527346,
798
- 0.033567745208740236,
799
- 0.03365683364868164,
800
- 0.03360665512084961
801
  ]
802
  },
803
  "throughput": {
804
  "unit": "tokens/s",
805
- "value": 29.653246203547816
806
  },
807
  "energy": null,
808
  "efficiency": null
 
2
  "prefill": {
3
  "memory": {
4
  "unit": "MB",
5
+ "max_ram": 1045.4016,
6
  "max_vram": 4905.238528,
7
  "max_reserved": 4420.796416,
8
  "max_allocated": 4252.667392
9
  },
10
  "latency": {
11
  "unit": "s",
12
+ "mean": 0.04986847159162686,
13
+ "stdev": 0.0011566518505120138,
14
  "values": [
15
+ 0.06372921752929687,
16
+ 0.050318336486816405,
17
+ 0.05015654373168945,
18
+ 0.050157569885253904,
19
+ 0.05006243133544922,
20
+ 0.05029171371459961,
21
+ 0.05021900939941406,
22
+ 0.05005311965942383,
23
+ 0.05022617721557617,
24
+ 0.050337791442871094,
25
+ 0.050283519744873044,
26
+ 0.05015961456298828,
27
+ 0.050119678497314454,
28
+ 0.04968172836303711,
29
+ 0.0495912971496582,
30
+ 0.049530879974365234,
31
+ 0.0496445426940918,
32
+ 0.04979507064819336,
33
+ 0.04986368179321289,
34
+ 0.049704959869384766,
35
+ 0.04969369506835938,
36
+ 0.04954531097412109,
37
+ 0.04972544097900391,
38
+ 0.04965990447998047,
39
+ 0.04981964874267578,
40
+ 0.04965580749511719,
41
+ 0.0495810546875,
42
+ 0.04957593536376953,
43
+ 0.049568767547607424,
44
+ 0.04948070526123047,
45
+ 0.04966912078857422,
46
+ 0.04964044952392578,
47
+ 0.04959743881225586,
48
+ 0.04952371215820312,
49
+ 0.0494919662475586,
50
+ 0.04972032165527344,
51
+ 0.049478656768798826,
52
+ 0.04960768127441406,
53
+ 0.049630496978759764,
54
+ 0.04959539031982422,
55
+ 0.0495728645324707,
56
+ 0.04966604614257813,
57
+ 0.04960563278198242,
58
+ 0.04958720016479492,
59
+ 0.04958310317993164,
60
+ 0.04952166366577149,
61
+ 0.049805313110351565,
62
+ 0.04962815856933594,
63
+ 0.04951859283447266,
64
+ 0.04953417587280273,
65
+ 0.04953497695922852,
66
+ 0.049495040893554686,
67
+ 0.049380352020263675,
68
+ 0.04953395080566406,
69
+ 0.04948070526123047,
70
+ 0.049683456420898435,
71
+ 0.04967628860473633,
72
+ 0.049484798431396484,
73
+ 0.049498111724853515,
74
+ 0.049632545471191405,
75
+ 0.049501182556152344,
76
+ 0.0494919662475586,
77
+ 0.04954316711425781,
78
+ 0.04963840103149414,
79
+ 0.04952473449707031,
80
+ 0.049442817687988284,
81
+ 0.0494837760925293,
82
+ 0.04953497695922852,
83
+ 0.049634304046630856,
84
+ 0.049438720703125,
85
+ 0.0496517105102539,
86
+ 0.04968038558959961,
87
+ 0.04953804779052735,
88
+ 0.049430526733398435,
89
+ 0.04953804779052735,
90
+ 0.04955136108398438,
91
+ 0.04950425720214844,
92
+ 0.04949401473999023,
93
+ 0.04981145477294922,
94
+ 0.05230284881591797,
95
+ 0.05170380783081055,
96
+ 0.049993824005126954,
97
+ 0.04965990447998047,
98
+ 0.0496363525390625,
99
+ 0.049870849609375,
100
+ 0.050348033905029295,
101
+ 0.05045248031616211,
102
+ 0.05002035140991211,
103
+ 0.04998246383666992,
104
+ 0.049729534149169925,
105
+ 0.049979393005371096,
106
+ 0.049870849609375,
107
+ 0.050083839416503906,
108
+ 0.049840225219726565,
109
+ 0.04976332855224609,
110
+ 0.04979916763305664,
111
+ 0.04975513458251953,
112
+ 0.049710079193115236,
113
+ 0.0497520637512207,
114
+ 0.04974796676635742,
115
+ 0.04954009628295898,
116
+ 0.04965580749511719,
117
+ 0.04993228912353516,
118
+ 0.04983091354370117,
119
+ 0.04981964874267578,
120
+ 0.04952473449707031,
121
+ 0.04956371307373047,
122
+ 0.04962918472290039,
123
+ 0.049549407958984375,
124
+ 0.04975027084350586,
125
+ 0.049658878326416016,
126
+ 0.049710079193115236,
127
+ 0.04968447875976562,
128
+ 0.04983295822143555,
129
+ 0.049734657287597656,
130
+ 0.04956159973144531,
131
+ 0.04963942337036133,
132
+ 0.049600513458251956,
133
+ 0.04993843078613281,
134
+ 0.04988313674926758,
135
+ 0.049999870300292966,
136
+ 0.04972159957885742,
137
+ 0.049855487823486325,
138
+ 0.04960870361328125,
139
+ 0.04965785598754883,
140
+ 0.04967935943603516,
141
+ 0.049871871948242184,
142
+ 0.04967116928100586,
143
+ 0.049896446228027344,
144
+ 0.049646656036376954,
145
+ 0.049563648223876954,
146
+ 0.04984934234619141,
147
+ 0.049686527252197264,
148
+ 0.04962201690673828,
149
+ 0.04972339248657227,
150
+ 0.04955344009399414,
151
+ 0.049516544342041016,
152
+ 0.049552383422851565,
153
+ 0.049786880493164064,
154
+ 0.049721343994140625,
155
+ 0.04983705520629883,
156
+ 0.0496732177734375,
157
+ 0.049582080841064455,
158
+ 0.049809406280517575,
159
+ 0.04988131332397461,
160
+ 0.04965478515625,
161
+ 0.0496363525390625,
162
+ 0.04957491302490234,
163
+ 0.049759231567382815,
164
+ 0.04960255813598633,
165
+ 0.049631233215332034,
166
+ 0.04969267272949219,
167
+ 0.049696769714355465,
168
+ 0.049721343994140625,
169
+ 0.04980428695678711,
170
+ 0.049719295501708984,
171
+ 0.049764575958251955,
172
+ 0.04968972778320312,
173
+ 0.049874942779541014,
174
+ 0.04972032165527344,
175
+ 0.04964044952392578,
176
+ 0.057373695373535157,
177
+ 0.05018009567260742,
178
+ 0.04990771102905273,
179
+ 0.049941505432128906,
180
+ 0.04962508773803711,
181
+ 0.049737728118896485,
182
+ 0.049772544860839846,
183
+ 0.049838081359863284,
184
+ 0.04980121612548828,
185
+ 0.04977356719970703,
186
+ 0.04972032165527344,
187
+ 0.049772544860839846,
188
+ 0.04985958480834961,
189
+ 0.0496732177734375,
190
+ 0.049724414825439454,
191
+ 0.04978585433959961,
192
+ 0.04962815856933594,
193
+ 0.049896446228027344,
194
+ 0.049821697235107425,
195
+ 0.04977766418457031,
196
+ 0.04973158264160156,
197
+ 0.049896446228027344,
198
+ 0.049999870300292966,
199
+ 0.049923072814941405,
200
+ 0.04960870361328125,
201
+ 0.04982681655883789,
202
+ 0.04998246383666992,
203
+ 0.05000396728515625,
204
+ 0.04973056030273437,
205
+ 0.04984320068359375,
206
+ 0.04986368179321289,
207
+ 0.04997017669677734,
208
+ 0.04998368072509766,
209
+ 0.049842174530029294,
210
+ 0.04969574356079102,
211
+ 0.04971110534667969,
212
+ 0.04972851181030274,
213
+ 0.049906688690185545,
214
+ 0.04969267272949219,
215
+ 0.04977151870727539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  ]
217
  },
218
  "throughput": {
219
  "unit": "tokens/s",
220
+ "value": 5133.504032294896
221
  },
222
  "energy": null,
223
  "efficiency": null
 
225
  "decode": {
226
  "memory": {
227
  "unit": "MB",
228
+ "max_ram": 1045.4016,
229
  "max_vram": 5255.462912,
230
  "max_reserved": 4771.0208,
231
  "max_allocated": 4715.61216
232
  },
233
  "latency": {
234
  "unit": "s",
235
+ "mean": 10.25303860855103,
236
+ "stdev": 0,
237
  "values": [
238
+ 10.25303860855103
 
239
  ]
240
  },
241
  "throughput": {
242
  "unit": "tokens/s",
243
+ "value": 24.87067587820552
244
  },
245
  "energy": null,
246
  "efficiency": null
 
249
  "memory": null,
250
  "latency": {
251
  "unit": "s",
252
+ "mean": 0.04020799454333737,
253
+ "stdev": 0.00017124566649101715,
254
  "values": [
255
+ 0.039923934936523436,
256
+ 0.040948513031005856,
257
+ 0.04051865768432617,
258
+ 0.04043775939941406,
259
+ 0.040342529296875,
260
+ 0.04033436965942383,
261
+ 0.04048175811767578,
262
+ 0.04037017440795899,
263
+ 0.04020870590209961,
264
+ 0.04028691101074219,
265
+ 0.04045619201660156,
266
+ 0.040343551635742186,
267
+ 0.04041017532348633,
268
+ 0.040244159698486326,
269
+ 0.040269824981689455,
270
+ 0.040594432830810545,
271
+ 0.040237056732177735,
272
+ 0.040474624633789064,
273
+ 0.04022272109985352,
274
+ 0.040204288482666016,
275
+ 0.040153087615966795,
276
+ 0.040342529296875,
277
+ 0.04022988891601562,
278
+ 0.04024422454833984,
279
+ 0.040304641723632816,
280
+ 0.04038348770141602,
281
+ 0.040234081268310545,
282
+ 0.04039158248901367,
283
+ 0.04066611099243164,
284
+ 0.040581119537353515,
285
+ 0.04032716751098633,
286
+ 0.040253440856933595,
287
+ 0.04037734222412109,
288
+ 0.040180736541748044,
289
+ 0.0404398078918457,
290
+ 0.040289279937744144,
291
+ 0.04041318511962891,
292
+ 0.04029337692260742,
293
+ 0.04023091125488281,
294
+ 0.040081409454345705,
295
+ 0.04011315155029297,
296
+ 0.040172542572021484,
297
+ 0.04013875198364258,
298
+ 0.04035686492919922,
299
+ 0.04005580902099609,
300
+ 0.03999859237670898,
301
+ 0.04017139053344727,
302
+ 0.04060671997070313,
303
+ 0.0403507194519043,
304
+ 0.040190975189208986,
305
+ 0.04020326232910156,
306
+ 0.04023199844360351,
307
+ 0.040374401092529294,
308
+ 0.040358718872070314,
309
+ 0.04038143920898438,
310
+ 0.04029158401489258,
311
+ 0.04018457412719727,
312
+ 0.04033331298828125,
313
+ 0.04027084732055664,
314
+ 0.040046592712402344,
315
+ 0.040395774841308595,
316
+ 0.04029644775390625,
317
+ 0.040248321533203124,
318
+ 0.040343551635742186,
319
+ 0.04049407958984375,
320
+ 0.04038143920898438,
321
+ 0.040129791259765624,
322
+ 0.040266494750976566,
323
+ 0.0402790412902832,
324
+ 0.040226814270019534,
325
+ 0.04034457778930664,
326
+ 0.04031488037109375,
327
+ 0.040420352935791014,
328
+ 0.04042444610595703,
329
+ 0.04033740615844727,
330
+ 0.04039680099487305,
331
+ 0.040578048706054685,
332
+ 0.04048076629638672,
333
+ 0.040205310821533204,
334
+ 0.04034457778930664,
335
+ 0.04025753784179688,
336
+ 0.04014796829223633,
337
+ 0.04016537475585937,
338
+ 0.040197120666503904,
339
+ 0.040000511169433595,
340
+ 0.04039680099487305,
341
+ 0.040169471740722655,
342
+ 0.04035276794433594,
343
+ 0.040400894165039065,
344
+ 0.04012953567504883,
345
+ 0.04007628631591797,
346
+ 0.040217601776123046,
347
+ 0.040114177703857425,
348
+ 0.04005990219116211,
349
+ 0.04008652877807617,
350
+ 0.0403056640625,
351
+ 0.040436992645263674,
352
+ 0.04030643081665039,
353
+ 0.04026675033569336,
354
+ 0.04036710357666016,
355
+ 0.03996876907348633,
356
+ 0.040243198394775394,
357
+ 0.040444927215576174,
358
+ 0.04007628631591797,
359
+ 0.040085792541503906,
360
+ 0.039993057250976564,
361
+ 0.04015718460083008,
362
+ 0.04008038330078125,
363
+ 0.039965694427490234,
364
+ 0.04016128158569336,
365
+ 0.04013363265991211,
366
+ 0.04014899063110351,
367
+ 0.04012543869018555,
368
+ 0.04005068969726563,
369
+ 0.040610912322998044,
370
+ 0.040493984222412106,
371
+ 0.04039884948730469,
372
+ 0.04029644775390625,
373
+ 0.040172542572021484,
374
+ 0.0400711669921875,
375
+ 0.04024524688720703,
376
+ 0.04021964645385742,
377
+ 0.04020444869995117,
378
+ 0.04001571273803711,
379
+ 0.04008774566650391,
380
+ 0.03995014572143555,
381
+ 0.040188926696777344,
382
+ 0.04022476959228516,
383
+ 0.04027084732055664,
384
+ 0.04010598373413086,
385
+ 0.040025089263916014,
386
+ 0.04022886276245117,
387
+ 0.04034969711303711,
388
+ 0.040180736541748044,
389
+ 0.040286209106445314,
390
+ 0.040130561828613284,
391
+ 0.040360958099365234,
392
+ 0.04036198425292969,
393
+ 0.04007731246948242,
394
+ 0.04023091125488281,
395
+ 0.04017356872558594,
396
+ 0.0401868782043457,
397
+ 0.04005887985229492,
398
+ 0.040018943786621096,
399
+ 0.03984384155273438,
400
+ 0.04030361557006836,
401
+ 0.04027494430541992,
402
+ 0.040049663543701174,
403
+ 0.04029439926147461,
404
+ 0.040202239990234374,
405
+ 0.040180736541748044,
406
+ 0.03995750427246094,
407
+ 0.0402503662109375,
408
+ 0.04022169494628906,
409
+ 0.04030681610107422,
410
+ 0.04034342575073242,
411
+ 0.040220672607421876,
412
+ 0.04028108978271484,
413
+ 0.04012748718261719,
414
+ 0.04002304077148437,
415
+ 0.04020121765136719,
416
+ 0.04022988891601562,
417
+ 0.040414207458496096,
418
+ 0.040390655517578124,
419
+ 0.040197246551513674,
420
+ 0.04011916732788086,
421
+ 0.04021452713012695,
422
+ 0.040158206939697266,
423
+ 0.04013875198364258,
424
+ 0.04044595336914063,
425
+ 0.04072550582885742,
426
+ 0.04021350479125976,
427
+ 0.040597503662109374,
428
+ 0.04010905456542969,
429
+ 0.04013772964477539,
430
+ 0.040018943786621096,
431
+ 0.040256511688232424,
432
+ 0.04033740615844727,
433
+ 0.040199169158935545,
434
+ 0.04010700988769531,
435
+ 0.04024524688720703,
436
+ 0.0403917121887207,
437
+ 0.04025955200195312,
438
+ 0.04010086441040039,
439
+ 0.04028108978271484,
440
+ 0.04032819366455078,
441
+ 0.040325119018554685,
442
+ 0.040068096160888675,
443
+ 0.03986227035522461,
444
+ 0.03998822402954102,
445
+ 0.040376319885253906,
446
+ 0.0403240966796875,
447
+ 0.04001587295532227,
448
+ 0.04003635025024414,
449
+ 0.0401448974609375,
450
+ 0.04009881591796875,
451
+ 0.04043571090698242,
452
+ 0.04011929702758789,
453
+ 0.04024115371704102,
454
+ 0.04000665664672851,
455
+ 0.04009062576293945,
456
+ 0.039997440338134765,
457
+ 0.03993907165527344,
458
+ 0.03988479995727539,
459
+ 0.0398837776184082,
460
+ 0.04017663955688477,
461
+ 0.039981056213378906,
462
+ 0.040010753631591796,
463
+ 0.04002304077148437,
464
+ 0.040207359313964845,
465
+ 0.040041473388671874,
466
+ 0.04018380737304687,
467
+ 0.04019200134277344,
468
+ 0.04014387130737305,
469
+ 0.04008038330078125,
470
+ 0.040008705139160154,
471
+ 0.03998720169067383,
472
+ 0.04004249572753906,
473
+ 0.03994214248657227,
474
+ 0.04009574508666992,
475
+ 0.04006195068359375,
476
+ 0.04002304077148437,
477
+ 0.04005068969726563,
478
+ 0.04020121765136719,
479
+ 0.039962623596191404,
480
+ 0.04037017440795899,
481
+ 0.04046131134033203,
482
+ 0.04007014465332031,
483
+ 0.04009062576293945,
484
+ 0.040123390197753905,
485
+ 0.04011520004272461,
486
+ 0.04015206527709961,
487
+ 0.04003839874267578,
488
+ 0.03986022567749024,
489
+ 0.040139774322509765,
490
+ 0.04010700988769531,
491
+ 0.040035327911376956,
492
+ 0.040043521881103515,
493
+ 0.039929855346679685,
494
+ 0.03989503860473633,
495
+ 0.0399738883972168,
496
+ 0.04001177597045898,
497
+ 0.039962623596191404,
498
+ 0.04008857727050781,
499
+ 0.04000153732299805,
500
+ 0.0401162223815918,
501
+ 0.04003635025024414,
502
+ 0.04026777648925781,
503
+ 0.04012953567504883,
504
+ 0.04001587295532227,
505
+ 0.040103935241699216,
506
+ 0.040013824462890625,
507
+ 0.04007526397705078,
508
+ 0.03989606475830078,
509
+ 0.04017049789428711
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
  ]
511
  },
512
  "throughput": {
513
  "unit": "tokens/s",
514
+ "value": 24.87067587820552
515
  },
516
  "energy": null,
517
  "efficiency": null