diff --git "a/runs.json" "b/runs.json" new file mode 100644--- /dev/null +++ "b/runs.json" @@ -0,0 +1,2438 @@ +[ + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "static", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.670\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 40, + "throughput": 2.67, + "latency_mean": 375.10382185000003, + "latency_std": 41.77851887343237, + "latency_50": 402.085788, + "latency_90": 405.6853881, + "latency_95": 406.5309876, + "latency_99": 407.68657795999997, + "latency_999": 407.928640196 + }, + "optimized": { + "nb_forwards": 67, + "throughput": 4.47, + "latency_mean": 225.09343717910448, + "latency_std": 5.4422008927739745, + "latency_50": 224.468437, + "latency_90": 233.1922012, + "latency_95": 234.06977830000002, + "latency_99": 238.47171616, + "latency_999": 239.621179816 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 226, + "throughput": 15.07, + "latency_mean": 66.56901730973452, + "latency_std": 7.513087298346711, + "latency_50": 71.2563305, + "latency_90": 72.6457545, + "latency_95": 72.8911035, + "latency_99": 73.828916, + "latency_999": 74.4237675 + }, + "optimized": { + "nb_forwards": 436, + "throughput": 29.07, + "latency_mean": 34.45165110091743, + "latency_std": 1.0538553485588218, + "latency_50": 34.582433, + "latency_90": 34.9882515, + "latency_95": 35.17064425, + "latency_99": 35.965547449999995, + "latency_999": 38.29062908499999 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 126, + "throughput": 8.4, + "latency_mean": 119.99883396031747, + "latency_std": 0.5389393608027111, + "latency_50": 119.924409, + "latency_90": 120.578213, + "latency_95": 120.87888125, + "latency_99": 121.94754125, + "latency_999": 122.377029 + }, + "optimized": { + "nb_forwards": 260, + "throughput": 17.33, + "latency_mean": 57.854139219230774, + "latency_std": 2.41471729691721, + "latency_50": 57.400247, + "latency_90": 61.3181592, + "latency_95": 62.38365544999999, + "latency_99": 64.46206894, + "latency_999": 65.22337084999998 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 348, + "throughput": 23.2, + "latency_mean": 43.22926725, + "latency_std": 1.1946103356932485, + "latency_50": 43.0492745, + "latency_90": 44.705008299999996, + "latency_95": 45.636852299999994, + "latency_99": 46.63035402, + "latency_999": 46.997215917999995 + }, + "optimized": { + "nb_forwards": 789, + "throughput": 52.6, + "latency_mean": 19.02440978073511, + "latency_std": 1.1428706688800712, + "latency_50": 18.854472, + "latency_90": 20.772688400000003, + "latency_95": 21.339546999999996, + "latency_99": 21.90764228, + "latency_999": 22.18167628399999 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 204, + "throughput": 13.6, + "latency_mean": 73.54620642647059, + "latency_std": 1.573277335192714, + "latency_50": 73.3753375, + "latency_90": 75.5904331, + "latency_95": 76.15987129999999, + "latency_99": 78.56596293, + "latency_999": 80.276804319 + }, + "optimized": { + "nb_forwards": 396, + "throughput": 26.4, + "latency_mean": 37.94637206818182, + "latency_std": 0.22863560761755683, + "latency_50": 37.917286, + "latency_90": 38.2418925, + "latency_95": 38.31172375, + "latency_99": 38.762135050000005, + "latency_999": 38.947024510000006 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 70, + "throughput": 4.67, + "latency_mean": 215.9852832857143, + "latency_std": 2.6695382982970624, + "latency_50": 215.4558835, + "latency_90": 219.254347, + "latency_95": 219.83214235, + "latency_99": 222.17290862000002, + "latency_999": 222.546938162 + }, + "optimized": { + "nb_forwards": 116, + "throughput": 7.73, + "latency_mean": 130.388671, + "latency_std": 0.9538270722224035, + "latency_50": 130.420778, + "latency_90": 131.4585225, + "latency_95": 131.97905225, + "latency_99": 133.03534015, + "latency_999": 133.14044914 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 68, + "throughput": 4.53, + "latency_mean": 222.9005361617647, + "latency_std": 2.9786389600252616, + "latency_50": 222.130681, + "latency_90": 226.3922357, + "latency_95": 227.17735725, + "latency_99": 231.35961219, + "latency_999": 232.82185281900001 + }, + "optimized": { + "nb_forwards": 112, + "throughput": 7.47, + "latency_mean": 135.05900691964285, + "latency_std": 0.852188728183432, + "latency_50": 135.0183545, + "latency_90": 136.08118190000002, + "latency_95": 136.6358585, + "latency_99": 137.6042464, + "latency_999": 137.777202544 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 127, + "throughput": 8.47, + "latency_mean": 118.38734347244095, + "latency_std": 3.928549187092105, + "latency_50": 118.722872, + "latency_90": 119.61111340000001, + "latency_95": 120.56405670000001, + "latency_99": 122.34993956, + "latency_999": 122.379941486 + }, + "optimized": { + "nb_forwards": 233, + "throughput": 15.53, + "latency_mean": 64.56411339484978, + "latency_std": 0.8101063203434803, + "latency_50": 64.369473, + "latency_90": 65.578687, + "latency_95": 66.343236, + "latency_99": 67.23023495999999, + "latency_999": 67.842266136 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 314, + "throughput": 20.93, + "latency_mean": 47.871883372611464, + "latency_std": 0.8675741645315053, + "latency_50": 47.86284, + "latency_90": 48.8556855, + "latency_95": 49.34610644999999, + "latency_99": 50.61812615, + "latency_999": 50.940127244 + }, + "optimized": { + "nb_forwards": 1419, + "throughput": 94.6, + "latency_mean": 10.575771353770262, + "latency_std": 0.6992989868391869, + "latency_50": 10.433279, + "latency_90": 11.4260774, + "latency_95": 12.119523699999998, + "latency_99": 12.74776788, + "latency_999": 13.092563020000012 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.06543578604398588, + "recall": 0.24335240659710536, + "f1": 0.10313837375178317, + "accuracy": 0.35697597445582335 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + }, + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "static", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.038\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 315, + "throughput": 21.0, + "latency_mean": 47.722406904761904, + "latency_std": 0.78575656702049, + "latency_50": 47.657137, + "latency_90": 48.539485, + "latency_95": 49.1951445, + "latency_99": 50.459615660000004, + "latency_999": 51.17226236 + }, + "optimized": { + "nb_forwards": 1829, + "throughput": 121.93, + "latency_mean": 8.204672595407327, + "latency_std": 0.6281598971222003, + "latency_50": 8.030814, + "latency_90": 9.035519800000001, + "latency_95": 9.1478804, + "latency_99": 9.59396184, + "latency_999": 10.840923620000012 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 140, + "throughput": 9.33, + "latency_mean": 107.46691665714286, + "latency_std": 14.217711617362141, + "latency_50": 118.404271, + "latency_90": 119.217932, + "latency_95": 120.1323987, + "latency_99": 121.88812750999999, + "latency_999": 122.23331556199999 + }, + "optimized": { + "nb_forwards": 336, + "throughput": 22.4, + "latency_mean": 44.742087273809524, + "latency_std": 1.297316432066614, + "latency_50": 44.5606265, + "latency_90": 46.6604945, + "latency_95": 47.15595925, + "latency_99": 47.76239855, + "latency_999": 48.352806460000004 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 220.25039004347826, + "latency_std": 1.5005813984244252, + "latency_50": 219.820796, + "latency_90": 222.1719386, + "latency_95": 224.2704662, + "latency_99": 225.16129407999998, + "latency_999": 225.809551408 + }, + "optimized": { + "nb_forwards": 160, + "throughput": 10.67, + "latency_mean": 94.04712534375, + "latency_std": 4.044440830749728, + "latency_50": 92.7247505, + "latency_90": 100.1824987, + "latency_95": 103.149005, + "latency_99": 106.80153862, + "latency_999": 107.010720374 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 251, + "throughput": 16.73, + "latency_mean": 59.98497594422311, + "latency_std": 1.3002235321937636, + "latency_50": 59.807255, + "latency_90": 61.620392, + "latency_95": 62.9347585, + "latency_99": 63.5797715, + "latency_999": 64.073255 + }, + "optimized": { + "nb_forwards": 933, + "throughput": 62.2, + "latency_mean": 16.080496909967845, + "latency_std": 0.6562666342873719, + "latency_50": 16.202643, + "latency_90": 16.5627894, + "latency_95": 16.7647474, + "latency_99": 17.10871436, + "latency_999": 17.232195299999997 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 271, + "throughput": 18.07, + "latency_mean": 55.44747836162362, + "latency_std": 1.1058764508579348, + "latency_50": 55.358138, + "latency_90": 56.887965, + "latency_95": 57.3142435, + "latency_99": 58.44232040000001, + "latency_999": 58.78495296 + }, + "optimized": { + "nb_forwards": 540, + "throughput": 36.0, + "latency_mean": 27.828701338888887, + "latency_std": 0.2830425296733134, + "latency_50": 27.783282, + "latency_90": 28.192628600000003, + "latency_95": 28.425469149999998, + "latency_99": 28.75255937, + "latency_999": 28.907412832000002 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 211, + "throughput": 14.07, + "latency_mean": 71.34490764454976, + "latency_std": 0.8606794549682575, + "latency_50": 71.218379, + "latency_90": 72.307616, + "latency_95": 72.7004855, + "latency_99": 73.44670070000001, + "latency_999": 76.77649755999997 + }, + "optimized": { + "nb_forwards": 473, + "throughput": 31.53, + "latency_mean": 31.754805171247355, + "latency_std": 0.4300285133323451, + "latency_50": 31.703291, + "latency_90": 32.3621104, + "latency_95": 32.6649158, + "latency_99": 33.04651476, + "latency_999": 33.134080912 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 125, + "throughput": 8.33, + "latency_mean": 120.569009496, + "latency_std": 0.8160222004383323, + "latency_50": 120.371051, + "latency_90": 121.2576836, + "latency_95": 122.412088, + "latency_99": 123.63907148, + "latency_999": 124.379808824 + }, + "optimized": { + "nb_forwards": 275, + "throughput": 18.33, + "latency_mean": 54.700663727272726, + "latency_std": 0.6805625823558532, + "latency_50": 54.534815, + "latency_90": 55.637867, + "latency_95": 56.2308853, + "latency_99": 57.06877728, + "latency_999": 57.400091958 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 45, + "throughput": 3.0, + "latency_mean": 337.56930973333334, + "latency_std": 42.73015832642524, + "latency_50": 311.940976, + "latency_90": 408.1195232, + "latency_95": 409.32610719999997, + "latency_99": 409.70182116, + "latency_999": 409.771442316 + }, + "optimized": { + "nb_forwards": 73, + "throughput": 4.87, + "latency_mean": 205.58670958904108, + "latency_std": 17.128151144285876, + "latency_50": 216.043924, + "latency_90": 219.5796966, + "latency_95": 221.6779058, + "latency_99": 224.22378336000003, + "latency_999": 224.454495336 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 70, + "throughput": 4.67, + "latency_mean": 215.1244777, + "latency_std": 1.555883759703903, + "latency_50": 214.6171285, + "latency_90": 216.7817589, + "latency_95": 218.67742330000002, + "latency_99": 220.89366772999998, + "latency_999": 221.10970307300002 + }, + "optimized": { + "nb_forwards": 173, + "throughput": 11.53, + "latency_mean": 86.89619405780347, + "latency_std": 2.3718385044122723, + "latency_50": 86.535865, + "latency_90": 90.37287020000001, + "latency_95": 91.8051702, + "latency_99": 92.75978772, + "latency_999": 92.817659772 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.9038969616908851, + "recall": 0.9212386401884888, + "f1": 0.912485414235706, + "accuracy": 0.9842295860753086 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + }, + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.033\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 167, + "throughput": 11.13, + "latency_mean": 90.0069231257485, + "latency_std": 1.7115040048180659, + "latency_50": 89.808392, + "latency_90": 92.1615498, + "latency_95": 93.00187689999998, + "latency_99": 94.87745056, + "latency_999": 96.02146185400001 + }, + "optimized": { + "nb_forwards": 347, + "throughput": 23.13, + "latency_mean": 43.265280452449566, + "latency_std": 2.7751472818818734, + "latency_50": 44.883167, + "latency_90": 45.35501180000001, + "latency_95": 46.0536215, + "latency_99": 46.73552832, + "latency_999": 46.9292037 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 311, + "throughput": 20.73, + "latency_mean": 48.2705229710611, + "latency_std": 1.1247628435592778, + "latency_50": 48.262398, + "latency_90": 49.675041, + "latency_95": 50.1826075, + "latency_99": 51.5223755, + "latency_999": 52.486126479999996 + }, + "optimized": { + "nb_forwards": 1955, + "throughput": 130.33, + "latency_mean": 7.675419680306905, + "latency_std": 0.3408992210776522, + "latency_50": 7.708051, + "latency_90": 7.9747872, + "latency_95": 8.22643, + "latency_99": 8.42614896, + "latency_999": 9.247490550000006 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 70, + "throughput": 4.67, + "latency_mean": 214.59417684285717, + "latency_std": 1.7664217544171172, + "latency_50": 214.2422325, + "latency_90": 216.45445769999998, + "latency_95": 218.51388645, + "latency_99": 220.63894481, + "latency_999": 221.498688881 + }, + "optimized": { + "nb_forwards": 171, + "throughput": 11.4, + "latency_mean": 87.8829150994152, + "latency_std": 0.9834434065965216, + "latency_50": 87.63179, + "latency_90": 88.639136, + "latency_95": 90.3700155, + "latency_99": 91.69530259999999, + "latency_999": 92.0263997 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 170, + "throughput": 11.33, + "latency_mean": 88.68199067647059, + "latency_std": 1.5268934268549699, + "latency_50": 88.7044375, + "latency_90": 90.6080309, + "latency_95": 91.1070495, + "latency_99": 92.72121441, + "latency_999": 94.157151444 + }, + "optimized": { + "nb_forwards": 347, + "throughput": 23.13, + "latency_mean": 43.326895181556196, + "latency_std": 0.4931306808409643, + "latency_50": 43.189003, + "latency_90": 44.0324068, + "latency_95": 44.3867949, + "latency_99": 45.111986800000004, + "latency_999": 45.838947726 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 205, + "throughput": 13.67, + "latency_mean": 73.52497626829268, + "latency_std": 1.0865490902318493, + "latency_50": 73.373698, + "latency_90": 74.60406520000001, + "latency_95": 74.979193, + "latency_99": 76.63692036, + "latency_999": 80.98970668399998 + }, + "optimized": { + "nb_forwards": 562, + "throughput": 37.47, + "latency_mean": 26.697691247330958, + "latency_std": 0.33379520623836, + "latency_50": 26.612126, + "latency_90": 27.0071515, + "latency_95": 27.35591295, + "latency_99": 27.96691773, + "latency_999": 28.784246988999993 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 210, + "throughput": 14.0, + "latency_mean": 71.64548409523809, + "latency_std": 0.9364269258806002, + "latency_50": 71.3511745, + "latency_90": 73.0493582, + "latency_95": 73.74027225, + "latency_99": 74.29445254000001, + "latency_999": 76.295386572 + }, + "optimized": { + "nb_forwards": 700, + "throughput": 46.67, + "latency_mean": 21.43839562, + "latency_std": 0.7747656036456344, + "latency_50": 21.458011, + "latency_90": 22.4099538, + "latency_95": 22.8598522, + "latency_99": 23.88133127, + "latency_999": 24.30084730600001 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 341, + "throughput": 22.73, + "latency_mean": 44.00805451906158, + "latency_std": 2.0146525121369048, + "latency_50": 43.40077, + "latency_90": 46.844243, + "latency_95": 48.01232, + "latency_99": 48.8285812, + "latency_999": 49.91079640000002 + }, + "optimized": { + "nb_forwards": 1191, + "throughput": 79.4, + "latency_mean": 12.596608948782535, + "latency_std": 0.4604972017819279, + "latency_50": 12.560364, + "latency_90": 13.254538, + "latency_95": 13.4481775, + "latency_99": 13.824474499999997, + "latency_999": 14.306386329999988 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 89, + "throughput": 5.93, + "latency_mean": 170.04430629213482, + "latency_std": 1.7973801840824346, + "latency_50": 169.804268, + "latency_90": 172.3942636, + "latency_95": 172.909571, + "latency_99": 174.41490236, + "latency_999": 174.83032853600002 + }, + "optimized": { + "nb_forwards": 184, + "throughput": 12.27, + "latency_mean": 81.67628805434782, + "latency_std": 2.2754557516244045, + "latency_50": 81.0874735, + "latency_90": 85.0994149, + "latency_95": 85.74545605, + "latency_99": 88.41751725, + "latency_999": 89.711957766 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 37, + "throughput": 2.47, + "latency_mean": 414.4216835675676, + "latency_std": 3.861346436570963, + "latency_50": 415.141617, + "latency_90": 419.931193, + "latency_95": 421.07318360000005, + "latency_99": 421.86215943999997, + "latency_999": 422.088350644 + }, + "optimized": { + "nb_forwards": 98, + "throughput": 6.53, + "latency_mean": 154.51860542857142, + "latency_std": 3.911988556596126, + "latency_50": 153.7217105, + "latency_90": 159.4319015, + "latency_95": 161.4452743, + "latency_99": 163.25851040999999, + "latency_999": 163.55056994100002 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.9337560487235108, + "recall": 0.9417704476607203, + "f1": 0.9377461248428991, + "accuracy": 0.9878314707371209 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + }, + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3125.174\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 210, + "throughput": 14.0, + "latency_mean": 71.72416266666667, + "latency_std": 1.0823165199145606, + "latency_50": 71.4790375, + "latency_90": 72.9353617, + "latency_95": 73.99241959999999, + "latency_99": 75.17633138, + "latency_999": 75.422236178 + }, + "optimized": { + "nb_forwards": 272, + "throughput": 18.13, + "latency_mean": 55.160912452205885, + "latency_std": 0.6521145589906397, + "latency_50": 55.189684, + "latency_90": 55.7469555, + "latency_95": 55.9487205, + "latency_99": 57.71467353, + "latency_999": 58.06304184299999 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 159, + "throughput": 10.6, + "latency_mean": 94.34125967295597, + "latency_std": 10.03252637987446, + "latency_50": 91.050963, + "latency_90": 120.90638, + "latency_95": 121.39196709999999, + "latency_99": 122.02035808, + "latency_999": 122.622120378 + }, + "optimized": { + "nb_forwards": 141, + "throughput": 9.4, + "latency_mean": 107.02119182269503, + "latency_std": 0.8366529782537496, + "latency_50": 106.835229, + "latency_90": 108.249955, + "latency_95": 108.628056, + "latency_99": 109.2045378, + "latency_999": 110.26474030000001 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 248, + "throughput": 16.53, + "latency_mean": 60.49658227016129, + "latency_std": 0.8819758121225761, + "latency_50": 60.343132, + "latency_90": 61.6551285, + "latency_95": 62.34768465, + "latency_99": 63.31741764, + "latency_999": 63.487983883999995 + }, + "optimized": { + "nb_forwards": 503, + "throughput": 33.53, + "latency_mean": 29.869608675944335, + "latency_std": 0.3559872739101281, + "latency_50": 29.794297, + "latency_90": 30.1428484, + "latency_95": 30.4600473, + "latency_99": 30.75200356, + "latency_999": 33.756738917999996 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 445, + "throughput": 29.67, + "latency_mean": 33.73988259325843, + "latency_std": 1.5629102688800236, + "latency_50": 33.480773, + "latency_90": 35.338715799999996, + "latency_95": 35.7559136, + "latency_99": 36.873395439999996, + "latency_999": 47.8180268919997 + }, + "optimized": { + "nb_forwards": 1019, + "throughput": 67.93, + "latency_mean": 14.732223261040234, + "latency_std": 0.9828893965554253, + "latency_50": 14.401579, + "latency_90": 16.897523, + "latency_95": 17.0312649, + "latency_99": 17.46899586, + "latency_999": 17.893012501999998 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 210, + "throughput": 14.0, + "latency_mean": 71.59536255238095, + "latency_std": 1.13338439776916, + "latency_50": 71.154012, + "latency_90": 73.357109, + "latency_95": 73.92077855, + "latency_99": 74.25820067000001, + "latency_999": 75.003235419 + }, + "optimized": { + "nb_forwards": 282, + "throughput": 18.8, + "latency_mean": 53.25616293617021, + "latency_std": 4.987232229454594, + "latency_50": 56.9742995, + "latency_90": 57.4831075, + "latency_95": 58.0667945, + "latency_99": 59.20656624, + "latency_999": 62.057965914999976 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 48, + "throughput": 3.2, + "latency_mean": 314.12435689583333, + "latency_std": 4.446221158816441, + "latency_50": 314.1643195, + "latency_90": 319.1971656, + "latency_95": 322.43416895, + "latency_99": 326.67617025, + "latency_999": 327.501665325 + }, + "optimized": { + "nb_forwards": 47, + "throughput": 3.13, + "latency_mean": 323.93693336170213, + "latency_std": 6.869443185578473, + "latency_50": 323.040004, + "latency_90": 334.16514060000003, + "latency_95": 334.9778985, + "latency_99": 336.97889436, + "latency_999": 337.249830036 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 70, + "throughput": 4.67, + "latency_mean": 216.0601891, + "latency_std": 2.0959098467954616, + "latency_50": 215.485022, + "latency_90": 219.06993409999998, + "latency_95": 219.52531109999998, + "latency_99": 222.29498567000002, + "latency_999": 223.08344266699999 + }, + "optimized": { + "nb_forwards": 92, + "throughput": 6.13, + "latency_mean": 163.75156455434782, + "latency_std": 7.2184659324399, + "latency_50": 161.7846625, + "latency_90": 176.00411860000003, + "latency_95": 179.22768994999998, + "latency_99": 183.41052014000002, + "latency_999": 188.60359111400004 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 165, + "throughput": 11.0, + "latency_mean": 91.31757051515152, + "latency_std": 2.4793130848747467, + "latency_50": 91.036743, + "latency_90": 93.733273, + "latency_95": 95.67535299999999, + "latency_99": 100.79730819999997, + "latency_999": 102.770634928 + }, + "optimized": { + "nb_forwards": 163, + "throughput": 10.87, + "latency_mean": 92.08470889570552, + "latency_std": 11.432354704165576, + "latency_50": 84.815059, + "latency_90": 105.0187196, + "latency_95": 105.2827365, + "latency_99": 106.12717995999999, + "latency_999": 106.21855146600001 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 80, + "throughput": 5.33, + "latency_mean": 188.59378125, + "latency_std": 22.91815791529492, + "latency_50": 172.9137385, + "latency_90": 219.50391, + "latency_95": 220.21657405000002, + "latency_99": 221.94066489, + "latency_999": 223.095406389 + }, + "optimized": { + "nb_forwards": 88, + "throughput": 5.87, + "latency_mean": 171.786525, + "latency_std": 7.080702021982688, + "latency_50": 170.577715, + "latency_90": 184.1455852, + "latency_95": 186.60128294999998, + "latency_99": 190.34417446, + "latency_999": 190.786445746 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + }, + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3102.480\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 37, + "throughput": 2.47, + "latency_mean": 408.1543652972973, + "latency_std": 3.7417217521654447, + "latency_50": 408.204875, + "latency_90": 412.71899360000003, + "latency_95": 415.5526328, + "latency_99": 417.56349056, + "latency_999": 418.43689865600004 + }, + "optimized": { + "nb_forwards": 47, + "throughput": 3.13, + "latency_mean": 325.0274505106383, + "latency_std": 16.42866903352087, + "latency_50": 323.280477, + "latency_90": 336.7398878, + "latency_95": 340.73717709999994, + "latency_99": 392.46748674, + "latency_999": 395.68078127399997 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 270, + "throughput": 18.0, + "latency_mean": 55.55779645555556, + "latency_std": 1.1666476240332937, + "latency_50": 55.618944, + "latency_90": 57.023193799999994, + "latency_95": 57.38550095, + "latency_99": 58.569932480000006, + "latency_999": 59.960250984 + }, + "optimized": { + "nb_forwards": 342, + "throughput": 22.8, + "latency_mean": 43.866120190058474, + "latency_std": 1.226638661359772, + "latency_50": 43.698571, + "latency_90": 45.555251, + "latency_95": 46.4943021, + "latency_99": 47.907215019999995, + "latency_999": 48.302261889 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 126, + "throughput": 8.4, + "latency_mean": 119.10938425396826, + "latency_std": 5.242609662991225, + "latency_50": 119.8814875, + "latency_90": 122.013498, + "latency_95": 122.8530875, + "latency_99": 124.503561, + "latency_999": 125.157123625 + }, + "optimized": { + "nb_forwards": 182, + "throughput": 12.13, + "latency_mean": 82.46068521428572, + "latency_std": 1.8851982998377101, + "latency_50": 82.4162485, + "latency_90": 84.9214402, + "latency_95": 85.51261295, + "latency_99": 87.40860171, + "latency_999": 88.40375641499999 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 448, + "throughput": 29.87, + "latency_mean": 33.48908037053571, + "latency_std": 1.1547087313019704, + "latency_50": 33.380541, + "latency_90": 35.0732339, + "latency_95": 35.621661700000004, + "latency_99": 36.54991294999999, + "latency_999": 37.980162713999995 + }, + "optimized": { + "nb_forwards": 1059, + "throughput": 70.6, + "latency_mean": 14.167183152974506, + "latency_std": 0.3567793828104339, + "latency_50": 14.113569, + "latency_90": 14.613918199999999, + "latency_95": 14.760972, + "latency_99": 15.203892, + "latency_999": 15.846131796 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 219.8001221014493, + "latency_std": 1.682059211691071, + "latency_50": 219.207299, + "latency_90": 222.769191, + "latency_95": 223.179612, + "latency_99": 224.12751892, + "latency_999": 225.31481789199998 + }, + "optimized": { + "nb_forwards": 92, + "throughput": 6.13, + "latency_mean": 163.62187851086955, + "latency_std": 3.9886368861391612, + "latency_50": 163.256997, + "latency_90": 168.68158590000002, + "latency_95": 170.24915575, + "latency_99": 173.29093638, + "latency_999": 175.298208738 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 214, + "throughput": 14.27, + "latency_mean": 70.39102674299066, + "latency_std": 0.645440942501749, + "latency_50": 70.2169265, + "latency_90": 71.08614279999999, + "latency_95": 71.45404645, + "latency_99": 73.1100614, + "latency_999": 74.05629770400002 + }, + "optimized": { + "nb_forwards": 265, + "throughput": 17.67, + "latency_mean": 56.682904645283024, + "latency_std": 0.29074631764368225, + "latency_50": 56.604756, + "latency_90": 56.9511692, + "latency_95": 57.2699554, + "latency_99": 58.03212468, + "latency_999": 58.197217128 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 85, + "throughput": 5.67, + "latency_mean": 176.69271157647057, + "latency_std": 19.225586228496905, + "latency_50": 166.86136, + "latency_90": 213.846849, + "latency_95": 214.451406, + "latency_99": 216.24402419999998, + "latency_999": 217.66288122 + }, + "optimized": { + "nb_forwards": 72, + "throughput": 4.8, + "latency_mean": 209.2788515277778, + "latency_std": 1.0661208330184972, + "latency_50": 209.1019465, + "latency_90": 210.5121405, + "latency_95": 211.63171434999998, + "latency_99": 212.82214144, + "latency_999": 212.860057144 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 170, + "throughput": 11.33, + "latency_mean": 88.32521131764706, + "latency_std": 1.6082421836618368, + "latency_50": 88.34603, + "latency_90": 90.24364059999999, + "latency_95": 90.82843125, + "latency_99": 92.49519597, + "latency_999": 94.140928359 + }, + "optimized": { + "nb_forwards": 160, + "throughput": 10.67, + "latency_mean": 94.18205897499999, + "latency_std": 11.564821917485974, + "latency_50": 103.343016, + "latency_90": 104.0051318, + "latency_95": 104.4116828, + "latency_99": 105.75135414, + "latency_999": 106.152972641 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 331, + "throughput": 22.07, + "latency_mean": 45.34665541087613, + "latency_std": 4.816947937501302, + "latency_50": 43.877047, + "latency_90": 48.211528, + "latency_95": 59.708112, + "latency_99": 60.144242399999996, + "latency_999": 60.57435263 + }, + "optimized": { + "nb_forwards": 623, + "throughput": 41.53, + "latency_mean": 24.101585462279292, + "latency_std": 0.490914928207836, + "latency_50": 24.033518, + "latency_90": 24.7787182, + "latency_95": 25.0034624, + "latency_99": 25.4011857, + "latency_999": 25.923231378000025 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + }, + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "static", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.991\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 348, + "throughput": 23.2, + "latency_mean": 43.15195731609195, + "latency_std": 1.3049791783770857, + "latency_50": 43.026682, + "latency_90": 44.93291970000001, + "latency_95": 45.4691658, + "latency_99": 46.60123, + "latency_999": 47.391866047 + }, + "optimized": { + "nb_forwards": 456, + "throughput": 30.4, + "latency_mean": 32.961429899122805, + "latency_std": 0.22594982626954999, + "latency_50": 32.925405, + "latency_90": 33.210001, + "latency_95": 33.29684425, + "latency_99": 33.561622449999994, + "latency_999": 34.45556356 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 328, + "throughput": 21.87, + "latency_mean": 45.769709847560975, + "latency_std": 5.105158395405133, + "latency_50": 47.564638, + "latency_90": 49.1212201, + "latency_95": 49.496884, + "latency_99": 50.38536208, + "latency_999": 50.559775634000005 + }, + "optimized": { + "nb_forwards": 790, + "throughput": 52.67, + "latency_mean": 18.99811082658228, + "latency_std": 0.18571276934069156, + "latency_50": 18.955887, + "latency_90": 19.2610981, + "latency_95": 19.371919350000002, + "latency_99": 19.535236910000002, + "latency_999": 19.715454875 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 214, + "throughput": 14.27, + "latency_mean": 70.2763914906542, + "latency_std": 0.2860076644639914, + "latency_50": 70.2829375, + "latency_90": 70.6419829, + "latency_95": 70.73166590000001, + "latency_99": 70.99700557, + "latency_999": 71.13112393099999 + }, + "optimized": { + "nb_forwards": 240, + "throughput": 16.0, + "latency_mean": 62.699298579166665, + "latency_std": 0.3115197355843105, + "latency_50": 62.624987, + "latency_90": 63.0751223, + "latency_95": 63.22834435, + "latency_99": 63.65015018, + "latency_999": 64.543749036 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 71, + "throughput": 4.73, + "latency_mean": 213.8736818169014, + "latency_std": 1.581524798477253, + "latency_50": 213.656346, + "latency_90": 214.2873, + "latency_95": 214.4610125, + "latency_99": 218.50125879999996, + "latency_999": 225.79765168000006 + }, + "optimized": { + "nb_forwards": 67, + "throughput": 4.47, + "latency_mean": 224.49603488059702, + "latency_std": 14.179003233192402, + "latency_50": 228.344384, + "latency_90": 228.9939548, + "latency_95": 229.2531382, + "latency_99": 229.51218631999998, + "latency_999": 229.879069232 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 126, + "throughput": 8.4, + "latency_mean": 119.57019325396826, + "latency_std": 0.9318178327933169, + "latency_50": 119.624132, + "latency_90": 120.7031565, + "latency_95": 120.95046825, + "latency_99": 121.62923875, + "latency_999": 122.22802 + }, + "optimized": { + "nb_forwards": 163, + "throughput": 10.87, + "latency_mean": 92.49704997546013, + "latency_std": 3.5224258875712082, + "latency_50": 91.538022, + "latency_90": 98.7628616, + "latency_95": 99.7127255, + "latency_99": 101.79093066, + "latency_999": 103.67060976999998 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 85, + "throughput": 5.67, + "latency_mean": 177.4090096117647, + "latency_std": 18.424368744880013, + "latency_50": 169.497669, + "latency_90": 219.17437120000002, + "latency_95": 221.619667, + "latency_99": 223.05841704, + "latency_999": 223.330345704 + }, + "optimized": { + "nb_forwards": 71, + "throughput": 4.73, + "latency_mean": 211.88950738028169, + "latency_std": 29.120931744682288, + "latency_50": 237.172705, + "latency_90": 238.463971, + "latency_95": 238.7496675, + "latency_99": 239.3086489, + "latency_999": 239.40512899 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 272, + "throughput": 18.13, + "latency_mean": 55.23031008455882, + "latency_std": 1.1571386368235503, + "latency_50": 55.2722705, + "latency_90": 56.607078200000004, + "latency_95": 57.48416465, + "latency_99": 58.30250327, + "latency_999": 58.528941114 + }, + "optimized": { + "nb_forwards": 253, + "throughput": 16.87, + "latency_mean": 59.30528993675889, + "latency_std": 0.2044975324140483, + "latency_50": 59.279888, + "latency_90": 59.5549104, + "latency_95": 59.60868, + "latency_99": 59.898025759999996, + "latency_999": 60.518632308 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 37, + "throughput": 2.47, + "latency_mean": 409.6796750540541, + "latency_std": 3.4623663344659903, + "latency_50": 409.339737, + "latency_90": 413.77022339999996, + "latency_95": 416.1084598, + "latency_99": 419.1264798, + "latency_999": 419.41464377999995 + }, + "optimized": { + "nb_forwards": 31, + "throughput": 2.07, + "latency_mean": 493.00366906451615, + "latency_std": 3.929536871101732, + "latency_50": 493.144742, + "latency_90": 498.634207, + "latency_95": 498.8949985, + "latency_99": 499.6773647, + "latency_999": 499.91265107 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 173, + "throughput": 11.53, + "latency_mean": 87.05498305780347, + "latency_std": 1.5424139654073576, + "latency_50": 86.877938, + "latency_90": 88.887705, + "latency_95": 89.4437458, + "latency_99": 91.18471404, + "latency_999": 92.01616860000001 + }, + "optimized": { + "nb_forwards": 135, + "throughput": 9.0, + "latency_mean": 111.36274993333333, + "latency_std": 9.414259569194291, + "latency_50": 115.101582, + "latency_90": 116.2764454, + "latency_95": 116.6387619, + "latency_99": 116.93107282, + "latency_999": 117.27618652400001 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.9087171052631579, + "recall": 0.929821608885897, + "f1": 0.9191482282482116, + "accuracy": 0.9856898095868541 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + }, + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "static", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3119.116\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 90, + "throughput": 6.0, + "latency_mean": 168.29981604444444, + "latency_std": 3.0980387809618484, + "latency_50": 167.5409385, + "latency_90": 170.98883170000002, + "latency_95": 174.5708666, + "latency_99": 179.67467568, + "latency_999": 183.294064668 + }, + "optimized": { + "nb_forwards": 75, + "throughput": 5.0, + "latency_mean": 201.88371818666667, + "latency_std": 5.1702570236285155, + "latency_50": 201.088276, + "latency_90": 208.22007219999998, + "latency_95": 211.4593359, + "latency_99": 214.37736256000002, + "latency_999": 218.26520815600003 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 128, + "throughput": 8.53, + "latency_mean": 117.345409171875, + "latency_std": 1.0232306892142995, + "latency_50": 117.570859, + "latency_90": 118.4811602, + "latency_95": 118.66072285, + "latency_99": 119.97331652, + "latency_999": 120.173632319 + }, + "optimized": { + "nb_forwards": 147, + "throughput": 9.8, + "latency_mean": 102.08866921088435, + "latency_std": 4.207024690408841, + "latency_50": 100.900148, + "latency_90": 106.6952282, + "latency_95": 111.67752269999998, + "latency_99": 115.24070447999998, + "latency_999": 122.8603576240001 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 256, + "throughput": 17.07, + "latency_mean": 58.79949217578125, + "latency_std": 7.2389593462092225, + "latency_50": 55.00797, + "latency_90": 71.60057, + "latency_95": 72.5113045, + "latency_99": 72.87857595, + "latency_999": 73.62029899000001 + }, + "optimized": { + "nb_forwards": 228, + "throughput": 15.2, + "latency_mean": 66.03353574122808, + "latency_std": 0.2155799936674679, + "latency_50": 66.018175, + "latency_90": 66.2633382, + "latency_95": 66.3559738, + "latency_99": 66.66150048, + "latency_999": 67.423175391 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 342, + "throughput": 22.8, + "latency_mean": 44.01321974853801, + "latency_std": 4.990156500118981, + "latency_50": 42.3942795, + "latency_90": 56.7239391, + "latency_95": 57.8124011, + "latency_99": 58.51948686, + "latency_999": 59.132265571 + }, + "optimized": { + "nb_forwards": 474, + "throughput": 31.6, + "latency_mean": 31.67708501898734, + "latency_std": 1.0256274074298153, + "latency_50": 31.644259, + "latency_90": 33.042136, + "latency_95": 33.4474054, + "latency_99": 34.71017928, + "latency_999": 35.40770895199999 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 129, + "throughput": 8.6, + "latency_mean": 116.96203879069768, + "latency_std": 0.9805415537137262, + "latency_50": 116.711745, + "latency_90": 117.476479, + "latency_95": 119.6996568, + "latency_99": 120.4817116, + "latency_999": 120.693500216 + }, + "optimized": { + "nb_forwards": 152, + "throughput": 10.13, + "latency_mean": 98.81530088157895, + "latency_std": 2.408744102355572, + "latency_50": 98.4953865, + "latency_90": 101.9166918, + "latency_95": 103.45597070000001, + "latency_99": 105.89102432000001, + "latency_999": 107.434858943 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 72, + "throughput": 4.8, + "latency_mean": 211.162219375, + "latency_std": 0.50332579026598, + "latency_50": 211.161418, + "latency_90": 211.835143, + "latency_95": 211.9189473, + "latency_99": 212.26486029, + "latency_999": 212.469340929 + }, + "optimized": { + "nb_forwards": 78, + "throughput": 5.2, + "latency_mean": 193.01161867948716, + "latency_std": 4.477161349632226, + "latency_50": 193.023931, + "latency_90": 198.0193413, + "latency_95": 200.14843679999998, + "latency_99": 206.14731525000002, + "latency_999": 211.593862125 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 235, + "throughput": 15.67, + "latency_mean": 63.85546833191489, + "latency_std": 6.973859157506543, + "latency_50": 69.383357, + "latency_90": 70.026538, + "latency_95": 70.3286013, + "latency_99": 70.6866161, + "latency_999": 71.185565742 + }, + "optimized": { + "nb_forwards": 244, + "throughput": 16.27, + "latency_mean": 61.635230135245905, + "latency_std": 5.810202171663617, + "latency_50": 58.524244, + "latency_90": 70.1898687, + "latency_95": 70.36230454999999, + "latency_99": 70.64778987000001, + "latency_999": 72.10732932400002 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 38, + "throughput": 2.53, + "latency_mean": 397.28297392105264, + "latency_std": 0.6990720798085198, + "latency_50": 397.169161, + "latency_90": 398.30781060000004, + "latency_95": 398.40529275, + "latency_99": 398.80296562, + "latency_999": 398.940152962 + }, + "optimized": { + "nb_forwards": 38, + "throughput": 2.53, + "latency_mean": 397.74469242105266, + "latency_std": 9.112867850853375, + "latency_50": 396.410306, + "latency_90": 411.3193268, + "latency_95": 412.24714525, + "latency_99": 415.15970197, + "latency_999": 416.37122889700004 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 336, + "throughput": 22.4, + "latency_mean": 44.668419485119045, + "latency_std": 4.092836734909812, + "latency_50": 46.0771455, + "latency_90": 46.8151985, + "latency_95": 47.12449375, + "latency_99": 47.4169311, + "latency_999": 47.675663060000005 + }, + "optimized": { + "nb_forwards": 800, + "throughput": 53.33, + "latency_mean": 18.77007987125, + "latency_std": 1.3253872815877223, + "latency_50": 18.32753, + "latency_90": 21.5042558, + "latency_95": 21.83777205, + "latency_99": 22.11860088, + "latency_999": 22.523989289000014 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.05001838911364472, + "recall": 0.16021541568495457, + "f1": 0.07623623623623622, + "accuracy": 0.31141700089560376 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + }, + { + "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", + "task": "token-classification", + "task_args": null, + "dataset": { + "path": "conll2003", + "eval_split": "validation", + "data_keys": { + "primary": "tokens", + "secondary": null + }, + "ref_keys": [ + "ner_tags" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3170.168\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 456, + "throughput": 30.4, + "latency_mean": 32.90015813157895, + "latency_std": 0.9986947055465656, + "latency_50": 32.8186695, + "latency_90": 34.223269, + "latency_95": 34.7373955, + "latency_99": 35.79150405, + "latency_999": 37.31698694000001 + }, + "optimized": { + "nb_forwards": 2133, + "throughput": 142.2, + "latency_mean": 7.03493227238631, + "latency_std": 0.35354984774223724, + "latency_50": 6.950204, + "latency_90": 7.5335166, + "latency_95": 7.5981322, + "latency_99": 7.7467418, + "latency_999": 7.949874583999998 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 128, + "throughput": 8.53, + "latency_mean": 117.711207453125, + "latency_std": 0.3039802910847197, + "latency_50": 117.713083, + "latency_90": 118.1415121, + "latency_95": 118.31928495, + "latency_99": 118.40991929, + "latency_999": 118.45796044400001 + }, + "optimized": { + "nb_forwards": 342, + "throughput": 22.8, + "latency_mean": 43.9346932251462, + "latency_std": 1.9823168868846608, + "latency_50": 44.48181, + "latency_90": 44.9406133, + "latency_95": 45.33209005, + "latency_99": 46.44872295999999, + "latency_999": 47.516010738999995 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 142, + "throughput": 9.47, + "latency_mean": 106.45585029577465, + "latency_std": 13.710361788824905, + "latency_50": 115.6649905, + "latency_90": 116.2565268, + "latency_95": 116.41340004999999, + "latency_99": 116.79343428, + "latency_999": 119.25626965200003 + }, + "optimized": { + "nb_forwards": 355, + "throughput": 23.67, + "latency_mean": 42.35345902253521, + "latency_std": 0.16699971956793372, + "latency_50": 42.330536, + "latency_90": 42.578397200000005, + "latency_95": 42.637379, + "latency_99": 42.80914288, + "latency_999": 43.008794852 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 272, + "throughput": 18.13, + "latency_mean": 55.20024977941176, + "latency_std": 0.9356174611392704, + "latency_50": 55.154456, + "latency_90": 56.3394606, + "latency_95": 56.87050635, + "latency_99": 57.61698746000001, + "latency_999": 58.883503068999985 + }, + "optimized": { + "nb_forwards": 584, + "throughput": 38.93, + "latency_mean": 25.724440731164385, + "latency_std": 0.1774167175481294, + "latency_50": 25.694553, + "latency_90": 25.9448093, + "latency_95": 26.0568756, + "latency_99": 26.254885369999997, + "latency_999": 26.525804783000005 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 214, + "throughput": 14.27, + "latency_mean": 70.41022843925234, + "latency_std": 0.9857357168050909, + "latency_50": 70.4451575, + "latency_90": 71.6293733, + "latency_95": 71.97065065000001, + "latency_99": 72.92320712, + "latency_999": 73.059142452 + }, + "optimized": { + "nb_forwards": 662, + "throughput": 44.13, + "latency_mean": 22.66916344410876, + "latency_std": 0.1531962446356845, + "latency_50": 22.644899, + "latency_90": 22.882265699999998, + "latency_95": 22.93566205, + "latency_99": 23.106536690000002, + "latency_999": 23.233784632 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 68, + "throughput": 4.53, + "latency_mean": 220.69089520588233, + "latency_std": 2.3715499250111147, + "latency_50": 220.928956, + "latency_90": 223.564325, + "latency_95": 224.24846325, + "latency_99": 225.59921588999998, + "latency_999": 225.913941489 + }, + "optimized": { + "nb_forwards": 160, + "throughput": 10.67, + "latency_mean": 94.33282233125, + "latency_std": 1.3440036091128054, + "latency_50": 94.0403795, + "latency_90": 96.10274629999999, + "latency_95": 96.99199729999998, + "latency_99": 98.02360016, + "latency_999": 99.111505656 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 91, + "throughput": 6.07, + "latency_mean": 165.66658007692308, + "latency_std": 9.200627894933877, + "latency_50": 163.47137, + "latency_90": 169.773734, + "latency_95": 172.0753955, + "latency_99": 213.2899759, + "latency_999": 214.50856338999998 + }, + "optimized": { + "nb_forwards": 172, + "throughput": 11.47, + "latency_mean": 87.71301163953488, + "latency_std": 1.0979033087486965, + "latency_50": 87.4641435, + "latency_90": 89.3678849, + "latency_95": 89.7342365, + "latency_99": 90.56987919, + "latency_999": 90.839936162 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 39, + "throughput": 2.6, + "latency_mean": 391.16164482051283, + "latency_std": 24.77584642803748, + "latency_50": 397.925266, + "latency_90": 401.79618239999996, + "latency_95": 402.9476371, + "latency_99": 403.36617936, + "latency_999": 403.412153736 + }, + "optimized": { + "nb_forwards": 82, + "throughput": 5.47, + "latency_mean": 183.34596223170732, + "latency_std": 1.9414583808520627, + "latency_50": 182.9643005, + "latency_90": 185.9883817, + "latency_95": 187.12977195, + "latency_99": 188.01645273, + "latency_999": 188.707885173 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 254, + "throughput": 16.93, + "latency_mean": 59.14541264566929, + "latency_std": 1.0597184999316425, + "latency_50": 59.1676845, + "latency_90": 60.564947700000005, + "latency_95": 60.97932805, + "latency_99": 61.52093252, + "latency_999": 61.899637246 + }, + "optimized": { + "nb_forwards": 1103, + "throughput": 73.53, + "latency_mean": 13.600328429737079, + "latency_std": 0.19450611359009803, + "latency_50": 13.567231, + "latency_90": 13.8727804, + "latency_95": 14.010098300000001, + "latency_99": 14.15573608, + "latency_999": 14.268220878 + } + } + ], + "others": { + "baseline": { + "precision": 0.9358012339503085, + "recall": 0.9444631437226523, + "f1": 0.9401122372057961, + "accuracy": 0.9882013940267124 + }, + "optimized": { + "precision": 0.9337560487235108, + "recall": 0.9417704476607203, + "f1": 0.9377461248428991, + "accuracy": 0.9878314707371209 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "distilbert" + } +] \ No newline at end of file