|
[ |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.278\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 126, |
|
"throughput": 8.4, |
|
"latency_mean": 119.09526233333332, |
|
"latency_std": 0.7552068299080471, |
|
"latency_50": 119.159921, |
|
"latency_90": 119.811235, |
|
"latency_95": 120.0762565, |
|
"latency_99": 121.2652685, |
|
"latency_999": 121.44601075 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 132, |
|
"throughput": 8.8, |
|
"latency_mean": 114.4251281590909, |
|
"latency_std": 0.5121636382229513, |
|
"latency_50": 114.325617, |
|
"latency_90": 114.8843064, |
|
"latency_95": 115.2926259, |
|
"latency_99": 116.21156513, |
|
"latency_999": 116.902565364 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 440, |
|
"throughput": 29.33, |
|
"latency_mean": 34.11132023409091, |
|
"latency_std": 1.1428837223428832, |
|
"latency_50": 34.0117225, |
|
"latency_90": 35.7673362, |
|
"latency_95": 36.214977399999995, |
|
"latency_99": 36.84041992, |
|
"latency_999": 37.28137764399999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 780, |
|
"throughput": 52.0, |
|
"latency_mean": 19.232586415384617, |
|
"latency_std": 0.19048831587735654, |
|
"latency_50": 19.205587, |
|
"latency_90": 19.3554198, |
|
"latency_95": 19.4416857, |
|
"latency_99": 19.747647200000003, |
|
"latency_999": 21.31330125800001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 91, |
|
"throughput": 6.07, |
|
"latency_mean": 166.1344919010989, |
|
"latency_std": 4.663006799959188, |
|
"latency_50": 165.748607, |
|
"latency_90": 167.573247, |
|
"latency_95": 168.185849, |
|
"latency_99": 176.2619040999998, |
|
"latency_999": 204.17836050999986 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 68, |
|
"throughput": 4.53, |
|
"latency_mean": 220.92038330882352, |
|
"latency_std": 17.990154740643728, |
|
"latency_50": 229.036164, |
|
"latency_90": 230.3515418, |
|
"latency_95": 230.71870145, |
|
"latency_99": 233.38270184, |
|
"latency_999": 233.602886084 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 220.57769956521742, |
|
"latency_std": 0.6686484290829855, |
|
"latency_50": 220.51593, |
|
"latency_90": 221.12682819999998, |
|
"latency_95": 221.66818940000002, |
|
"latency_99": 223.12707279999998, |
|
"latency_999": 223.50554278 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 73, |
|
"throughput": 4.87, |
|
"latency_mean": 207.50999308219178, |
|
"latency_std": 27.815623148152877, |
|
"latency_50": 189.774877, |
|
"latency_90": 237.9391546, |
|
"latency_95": 238.31236859999999, |
|
"latency_99": 240.56186224, |
|
"latency_999": 244.546144024 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 347, |
|
"throughput": 23.13, |
|
"latency_mean": 43.31934634870317, |
|
"latency_std": 1.0549048838002049, |
|
"latency_50": 43.200915, |
|
"latency_90": 44.882578200000005, |
|
"latency_95": 45.2033363, |
|
"latency_99": 46.21059290000001, |
|
"latency_999": 46.872020362 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 536, |
|
"throughput": 35.73, |
|
"latency_mean": 27.99766281716418, |
|
"latency_std": 1.0529425569523703, |
|
"latency_50": 27.69748, |
|
"latency_90": 29.514874, |
|
"latency_95": 30.11867, |
|
"latency_99": 30.962504349999996, |
|
"latency_999": 31.912116325000028 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 166, |
|
"throughput": 11.07, |
|
"latency_mean": 90.84950987349397, |
|
"latency_std": 2.0682139308506917, |
|
"latency_50": 90.8146185, |
|
"latency_90": 93.020294, |
|
"latency_95": 93.80372125, |
|
"latency_99": 96.35470339999999, |
|
"latency_999": 102.08768143500005 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 136, |
|
"throughput": 9.07, |
|
"latency_mean": 110.460723375, |
|
"latency_std": 10.94678012157101, |
|
"latency_50": 116.622233, |
|
"latency_90": 117.13783, |
|
"latency_95": 117.34992975, |
|
"latency_99": 117.67875504999999, |
|
"latency_999": 117.831356625 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 49, |
|
"throughput": 3.27, |
|
"latency_mean": 309.91095881632657, |
|
"latency_std": 3.608767284403935, |
|
"latency_50": 310.015937, |
|
"latency_90": 313.87800439999995, |
|
"latency_95": 314.33423439999996, |
|
"latency_99": 318.4142126, |
|
"latency_999": 321.07916876 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 42, |
|
"throughput": 2.8, |
|
"latency_mean": 357.87234047619046, |
|
"latency_std": 12.591861806274563, |
|
"latency_50": 355.861331, |
|
"latency_90": 371.2001084, |
|
"latency_95": 392.5037146499999, |
|
"latency_99": 396.58451314, |
|
"latency_999": 398.517758014 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 261, |
|
"throughput": 17.4, |
|
"latency_mean": 57.65446063601532, |
|
"latency_std": 2.434519247994199, |
|
"latency_50": 56.976535, |
|
"latency_90": 60.541172, |
|
"latency_95": 61.207556, |
|
"latency_99": 68.33251279999983, |
|
"latency_999": 73.13234924 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 259, |
|
"throughput": 17.27, |
|
"latency_mean": 57.94760827799228, |
|
"latency_std": 5.454128073391148, |
|
"latency_50": 62.715428, |
|
"latency_90": 63.1070498, |
|
"latency_95": 63.2122011, |
|
"latency_99": 63.43313314, |
|
"latency_999": 63.689983864000006 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 241, |
|
"throughput": 16.07, |
|
"latency_mean": 62.38362809958507, |
|
"latency_std": 6.7586580892753, |
|
"latency_50": 58.222043, |
|
"latency_90": 69.869798, |
|
"latency_95": 70.271116, |
|
"latency_99": 71.7271214, |
|
"latency_999": 80.2065777999999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 280, |
|
"throughput": 18.67, |
|
"latency_mean": 53.64114180357142, |
|
"latency_std": 5.084170546990271, |
|
"latency_50": 50.8657445, |
|
"latency_90": 59.753021700000005, |
|
"latency_95": 59.83479735, |
|
"latency_99": 59.98286641, |
|
"latency_999": 60.134200166 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 76.9, |
|
"f1": 83.01425661180923 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.977\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 205, |
|
"throughput": 13.67, |
|
"latency_mean": 73.39433734146341, |
|
"latency_std": 1.0224081759969428, |
|
"latency_50": 73.460104, |
|
"latency_90": 74.518562, |
|
"latency_95": 75.0664468, |
|
"latency_99": 75.84203632, |
|
"latency_999": 76.921990512 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 565, |
|
"throughput": 37.67, |
|
"latency_mean": 26.564435938053098, |
|
"latency_std": 0.187471490836448, |
|
"latency_50": 26.533942, |
|
"latency_90": 26.71886, |
|
"latency_95": 26.8078146, |
|
"latency_99": 27.12104648, |
|
"latency_999": 28.311193836000076 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 88, |
|
"throughput": 5.87, |
|
"latency_mean": 172.14481681818182, |
|
"latency_std": 1.6925453073042733, |
|
"latency_50": 172.1907685, |
|
"latency_90": 174.1415275, |
|
"latency_95": 175.0375522, |
|
"latency_99": 175.82339321, |
|
"latency_999": 175.940543321 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 159, |
|
"throughput": 10.6, |
|
"latency_mean": 94.77951160377359, |
|
"latency_std": 4.814673654949999, |
|
"latency_50": 96.147419, |
|
"latency_90": 97.289479, |
|
"latency_95": 97.7011618, |
|
"latency_99": 98.90126592, |
|
"latency_999": 99.86395080400003 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 314, |
|
"throughput": 20.93, |
|
"latency_mean": 47.86938606687898, |
|
"latency_std": 0.5732448823818431, |
|
"latency_50": 47.92357, |
|
"latency_90": 48.562472299999996, |
|
"latency_95": 48.774001, |
|
"latency_99": 49.11157929, |
|
"latency_999": 49.426655882 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 2076, |
|
"throughput": 138.4, |
|
"latency_mean": 7.228697234585741, |
|
"latency_std": 0.31178062817765007, |
|
"latency_50": 7.1645175, |
|
"latency_90": 7.7391565, |
|
"latency_95": 7.850395, |
|
"latency_99": 7.99090625, |
|
"latency_999": 8.272048450000009 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 124, |
|
"throughput": 8.27, |
|
"latency_mean": 121.20055792741935, |
|
"latency_std": 0.36103071773349715, |
|
"latency_50": 121.1512925, |
|
"latency_90": 121.64469659999999, |
|
"latency_95": 121.8583837, |
|
"latency_99": 121.94945611, |
|
"latency_999": 122.865046776 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 374, |
|
"throughput": 24.93, |
|
"latency_mean": 40.12498197593583, |
|
"latency_std": 1.289681830262187, |
|
"latency_50": 39.74041, |
|
"latency_90": 42.1230914, |
|
"latency_95": 42.6942433, |
|
"latency_99": 43.790769559999994, |
|
"latency_999": 44.761699371 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 407.8962844324324, |
|
"latency_std": 1.0450568064054995, |
|
"latency_50": 407.71793, |
|
"latency_90": 409.2667942, |
|
"latency_95": 409.5479284, |
|
"latency_99": 410.92905384, |
|
"latency_999": 411.387726384 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 100, |
|
"throughput": 6.67, |
|
"latency_mean": 151.48524175, |
|
"latency_std": 2.974018003966197, |
|
"latency_50": 151.4110505, |
|
"latency_90": 155.56673659999998, |
|
"latency_95": 156.83145130000003, |
|
"latency_99": 159.26098265000002, |
|
"latency_999": 159.893205065 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 209, |
|
"throughput": 13.93, |
|
"latency_mean": 72.10892416267941, |
|
"latency_std": 0.9442605052158691, |
|
"latency_50": 72.232233, |
|
"latency_90": 73.24587679999999, |
|
"latency_95": 73.5688604, |
|
"latency_99": 74.49961544, |
|
"latency_999": 74.99024779999999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 689, |
|
"throughput": 45.93, |
|
"latency_mean": 21.802329773584905, |
|
"latency_std": 1.2356400802889618, |
|
"latency_50": 21.462006, |
|
"latency_90": 23.394015399999997, |
|
"latency_95": 23.461499, |
|
"latency_99": 23.60606868, |
|
"latency_999": 23.790571160000002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 134, |
|
"throughput": 8.93, |
|
"latency_mean": 112.67185172388061, |
|
"latency_std": 9.561218465080065, |
|
"latency_50": 117.305159, |
|
"latency_90": 118.676979, |
|
"latency_95": 119.01818645, |
|
"latency_99": 119.38788219, |
|
"latency_999": 119.960128779 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 347, |
|
"throughput": 23.13, |
|
"latency_mean": 43.26179646974064, |
|
"latency_std": 0.18815052466746143, |
|
"latency_50": 43.253195, |
|
"latency_90": 43.494735799999994, |
|
"latency_95": 43.5794423, |
|
"latency_99": 43.73514478, |
|
"latency_999": 44.139542132 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 251, |
|
"throughput": 16.73, |
|
"latency_mean": 59.92467478486056, |
|
"latency_std": 0.5304618192835174, |
|
"latency_50": 59.959358, |
|
"latency_90": 60.55806, |
|
"latency_95": 60.8509665, |
|
"latency_99": 61.1526405, |
|
"latency_999": 61.21307125 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1191, |
|
"throughput": 79.4, |
|
"latency_mean": 12.6018126565911, |
|
"latency_std": 0.417689097037675, |
|
"latency_50": 12.531897, |
|
"latency_90": 13.148671, |
|
"latency_95": 13.431785, |
|
"latency_99": 13.783848599999999, |
|
"latency_999": 14.088670549999993 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 215.0252773857143, |
|
"latency_std": 8.088753831054484, |
|
"latency_50": 216.4045865, |
|
"latency_90": 218.0282024, |
|
"latency_95": 218.17546385, |
|
"latency_99": 219.1229224, |
|
"latency_999": 219.88093984 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 193, |
|
"throughput": 12.87, |
|
"latency_mean": 78.03187068393781, |
|
"latency_std": 5.702629213049006, |
|
"latency_50": 75.158498, |
|
"latency_90": 88.1716502, |
|
"latency_95": 88.3947608, |
|
"latency_99": 88.80716235999999, |
|
"latency_999": 89.147765224 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 80.6, |
|
"f1": 86.09652042402038 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.086\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 214.7585842714286, |
|
"latency_std": 1.0748282952739399, |
|
"latency_50": 214.6217865, |
|
"latency_90": 216.33038530000002, |
|
"latency_95": 216.71350644999998, |
|
"latency_99": 217.10804912999998, |
|
"latency_999": 217.27182111300002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 173, |
|
"throughput": 11.53, |
|
"latency_mean": 87.19321883815029, |
|
"latency_std": 0.4127804706511856, |
|
"latency_50": 87.115246, |
|
"latency_90": 87.827362, |
|
"latency_95": 87.9646276, |
|
"latency_99": 88.20961412, |
|
"latency_999": 88.528557128 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 220.37778250724637, |
|
"latency_std": 0.6723770956181739, |
|
"latency_50": 220.31347, |
|
"latency_90": 221.0006854, |
|
"latency_95": 221.6680164, |
|
"latency_99": 222.58888876, |
|
"latency_999": 222.634509076 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 179, |
|
"throughput": 11.93, |
|
"latency_mean": 84.18192030726257, |
|
"latency_std": 1.7207141010005933, |
|
"latency_50": 84.029571, |
|
"latency_90": 86.4943866, |
|
"latency_95": 87.52624759999999, |
|
"latency_99": 88.34396318, |
|
"latency_999": 89.769148814 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 165, |
|
"throughput": 11.0, |
|
"latency_mean": 90.97119612121213, |
|
"latency_std": 1.6585097335652936, |
|
"latency_50": 90.91036, |
|
"latency_90": 93.046208, |
|
"latency_95": 93.5801342, |
|
"latency_99": 95.02802792, |
|
"latency_999": 95.151213372 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 362, |
|
"throughput": 24.13, |
|
"latency_mean": 41.510697558011046, |
|
"latency_std": 2.6850168617333043, |
|
"latency_50": 40.5225655, |
|
"latency_90": 45.749891299999994, |
|
"latency_95": 45.84510315, |
|
"latency_99": 46.05479407, |
|
"latency_999": 46.304120276 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 157, |
|
"throughput": 10.47, |
|
"latency_mean": 95.77657172611464, |
|
"latency_std": 10.384333636895294, |
|
"latency_50": 91.24492, |
|
"latency_90": 118.03857640000001, |
|
"latency_95": 119.1807458, |
|
"latency_99": 120.53466472, |
|
"latency_999": 121.96483704799999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 369, |
|
"throughput": 24.6, |
|
"latency_mean": 40.65512325745258, |
|
"latency_std": 2.8634757589258784, |
|
"latency_50": 42.886645, |
|
"latency_90": 43.2775952, |
|
"latency_95": 43.3398654, |
|
"latency_99": 43.578850839999994, |
|
"latency_999": 43.896356176 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 407.3405624594595, |
|
"latency_std": 0.9446139169585147, |
|
"latency_50": 407.370805, |
|
"latency_90": 408.153985, |
|
"latency_95": 408.5356864, |
|
"latency_99": 410.3574352, |
|
"latency_999": 411.19967482 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 98, |
|
"throughput": 6.53, |
|
"latency_mean": 154.55170329591837, |
|
"latency_std": 3.2466049439500617, |
|
"latency_50": 153.957612, |
|
"latency_90": 159.2587284, |
|
"latency_95": 159.9041657, |
|
"latency_99": 163.49634651, |
|
"latency_999": 165.108355851 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 206, |
|
"throughput": 13.73, |
|
"latency_mean": 73.15383589805825, |
|
"latency_std": 1.109006864241087, |
|
"latency_50": 72.793732, |
|
"latency_90": 75.0387975, |
|
"latency_95": 75.40630425, |
|
"latency_99": 76.2248304, |
|
"latency_999": 80.54231973499994 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 725, |
|
"throughput": 48.33, |
|
"latency_mean": 20.701377126896553, |
|
"latency_std": 0.5521964132279599, |
|
"latency_50": 20.611305, |
|
"latency_90": 21.5047578, |
|
"latency_95": 21.702541800000002, |
|
"latency_99": 22.15760376, |
|
"latency_999": 24.283321767999993 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 312, |
|
"throughput": 20.8, |
|
"latency_mean": 48.10309290064102, |
|
"latency_std": 0.5452673615135677, |
|
"latency_50": 48.1016005, |
|
"latency_90": 48.788028, |
|
"latency_95": 48.96113875, |
|
"latency_99": 49.29684296, |
|
"latency_999": 49.564814242 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 2102, |
|
"throughput": 140.13, |
|
"latency_mean": 7.138094656517603, |
|
"latency_std": 0.28361271957621664, |
|
"latency_50": 7.1017695, |
|
"latency_90": 7.5524614, |
|
"latency_95": 7.751003399999999, |
|
"latency_99": 7.9702024399999996, |
|
"latency_999": 8.201174685999996 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 252, |
|
"throughput": 16.8, |
|
"latency_mean": 59.635192896825394, |
|
"latency_std": 0.5367633151420198, |
|
"latency_50": 59.6327055, |
|
"latency_90": 60.299356, |
|
"latency_95": 60.47347955, |
|
"latency_99": 60.860342730000006, |
|
"latency_999": 60.983657111 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1132, |
|
"throughput": 75.47, |
|
"latency_mean": 13.252757100706713, |
|
"latency_std": 0.7118720708722511, |
|
"latency_50": 13.7066955, |
|
"latency_90": 13.920642699999998, |
|
"latency_95": 13.9857479, |
|
"latency_99": 14.104317450000002, |
|
"latency_999": 14.310801054999994 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 261, |
|
"throughput": 17.4, |
|
"latency_mean": 57.64333686206896, |
|
"latency_std": 1.0745535083532258, |
|
"latency_50": 57.643633, |
|
"latency_90": 59.099249, |
|
"latency_95": 59.402868, |
|
"latency_99": 59.834367, |
|
"latency_999": 60.372973120000005 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 641, |
|
"throughput": 42.73, |
|
"latency_mean": 23.416976716068643, |
|
"latency_std": 0.819280052981588, |
|
"latency_50": 23.204464, |
|
"latency_90": 24.644357, |
|
"latency_95": 24.972543, |
|
"latency_99": 25.292027, |
|
"latency_999": 26.127226000000014 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 80.6, |
|
"f1": 86.09652042402038 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.804\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 74, |
|
"throughput": 4.93, |
|
"latency_mean": 203.90076075675674, |
|
"latency_std": 21.596870608446135, |
|
"latency_50": 219.806601, |
|
"latency_90": 222.28046669999998, |
|
"latency_95": 222.6357457, |
|
"latency_99": 223.52929084000002, |
|
"latency_999": 223.571662084 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 86, |
|
"throughput": 5.73, |
|
"latency_mean": 175.15544305813953, |
|
"latency_std": 18.874085885764828, |
|
"latency_50": 167.724299, |
|
"latency_90": 218.6726605, |
|
"latency_95": 218.96398125, |
|
"latency_99": 219.63268945, |
|
"latency_999": 219.994796845 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 252, |
|
"throughput": 16.8, |
|
"latency_mean": 59.620304126984124, |
|
"latency_std": 0.9369837443196661, |
|
"latency_50": 59.6408835, |
|
"latency_90": 60.899260700000006, |
|
"latency_95": 61.2446919, |
|
"latency_99": 61.72324802, |
|
"latency_999": 62.219581975000004 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 509, |
|
"throughput": 33.93, |
|
"latency_mean": 29.505452408644402, |
|
"latency_std": 0.10477107837580692, |
|
"latency_50": 29.494157, |
|
"latency_90": 29.626543, |
|
"latency_95": 29.6817152, |
|
"latency_99": 29.80756116, |
|
"latency_999": 30.165765552000007 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 127, |
|
"throughput": 8.47, |
|
"latency_mean": 118.58148563779527, |
|
"latency_std": 0.4419650117831178, |
|
"latency_50": 118.534868, |
|
"latency_90": 119.0686048, |
|
"latency_95": 119.2944373, |
|
"latency_99": 120.15261722, |
|
"latency_999": 120.63352342200001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 141, |
|
"throughput": 9.4, |
|
"latency_mean": 106.55073221985815, |
|
"latency_std": 0.3944079713253884, |
|
"latency_50": 106.498131, |
|
"latency_90": 106.921763, |
|
"latency_95": 107.019295, |
|
"latency_99": 107.93368059999999, |
|
"latency_999": 109.45881622000002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 440, |
|
"throughput": 29.33, |
|
"latency_mean": 34.126477825, |
|
"latency_std": 1.0283869127760359, |
|
"latency_50": 34.086549, |
|
"latency_90": 35.5103139, |
|
"latency_95": 35.82743535, |
|
"latency_99": 36.42215259, |
|
"latency_999": 36.908125651999995 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 882, |
|
"throughput": 58.8, |
|
"latency_mean": 17.017291861678004, |
|
"latency_std": 0.08720510862403949, |
|
"latency_50": 17.0091915, |
|
"latency_90": 17.1355021, |
|
"latency_95": 17.17376905, |
|
"latency_99": 17.25574468, |
|
"latency_999": 17.338018466 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 168, |
|
"throughput": 11.2, |
|
"latency_mean": 89.80876792857143, |
|
"latency_std": 2.724045354234363, |
|
"latency_50": 89.23447, |
|
"latency_90": 92.91518060000001, |
|
"latency_95": 95.965778, |
|
"latency_99": 99.1532608, |
|
"latency_999": 100.916031267 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 165, |
|
"throughput": 11.0, |
|
"latency_mean": 91.00189352121211, |
|
"latency_std": 11.013642722583302, |
|
"latency_50": 85.592419, |
|
"latency_90": 104.2900912, |
|
"latency_95": 104.361947, |
|
"latency_99": 104.4810248, |
|
"latency_999": 104.49576398800001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 74, |
|
"throughput": 4.93, |
|
"latency_mean": 204.2916795945946, |
|
"latency_std": 20.441719346331414, |
|
"latency_50": 214.991102, |
|
"latency_90": 216.5581006, |
|
"latency_95": 217.0407147, |
|
"latency_99": 217.41635044999998, |
|
"latency_999": 217.686203345 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 71, |
|
"throughput": 4.73, |
|
"latency_mean": 212.32542294366195, |
|
"latency_std": 0.531422234525691, |
|
"latency_50": 212.331527, |
|
"latency_90": 212.977908, |
|
"latency_95": 213.082789, |
|
"latency_99": 213.5128688, |
|
"latency_999": 214.10746658000002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 49, |
|
"throughput": 3.27, |
|
"latency_mean": 309.52593997959184, |
|
"latency_std": 3.142891825034627, |
|
"latency_50": 309.382131, |
|
"latency_90": 314.37877760000003, |
|
"latency_95": 315.6069922, |
|
"latency_99": 315.9048882, |
|
"latency_999": 315.94479852 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 34, |
|
"throughput": 2.27, |
|
"latency_mean": 445.23998747058823, |
|
"latency_std": 1.320393415283763, |
|
"latency_50": 445.3137435, |
|
"latency_90": 446.8903998, |
|
"latency_95": 447.35347025, |
|
"latency_99": 447.77492344999996, |
|
"latency_999": 447.88785324500003 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 268, |
|
"throughput": 17.87, |
|
"latency_mean": 55.97453225746269, |
|
"latency_std": 1.0305715235126534, |
|
"latency_50": 55.880397, |
|
"latency_90": 57.251859200000006, |
|
"latency_95": 57.9394124, |
|
"latency_99": 58.42131721, |
|
"latency_999": 58.653711982 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 280, |
|
"throughput": 18.67, |
|
"latency_mean": 53.5968861, |
|
"latency_std": 0.09504994641745691, |
|
"latency_50": 53.589719, |
|
"latency_90": 53.727543700000005, |
|
"latency_95": 53.763608700000006, |
|
"latency_99": 53.84968344, |
|
"latency_999": 53.900073552 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 207, |
|
"throughput": 13.8, |
|
"latency_mean": 72.80511813526569, |
|
"latency_std": 0.28605904617544076, |
|
"latency_50": 72.792014, |
|
"latency_90": 73.14956740000001, |
|
"latency_95": 73.2242503, |
|
"latency_99": 73.44227836, |
|
"latency_999": 74.268179328 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 263, |
|
"throughput": 17.53, |
|
"latency_mean": 57.04931692775665, |
|
"latency_std": 0.1852668604685806, |
|
"latency_50": 57.008233, |
|
"latency_90": 57.251367200000004, |
|
"latency_95": 57.3371062, |
|
"latency_99": 57.79880592, |
|
"latency_999": 58.027793358000004 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.838\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 340, |
|
"throughput": 22.67, |
|
"latency_mean": 44.18998163235294, |
|
"latency_std": 1.4697810745421789, |
|
"latency_50": 43.944576, |
|
"latency_90": 46.3169695, |
|
"latency_95": 46.85474685, |
|
"latency_99": 47.600308, |
|
"latency_999": 48.253145861 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 459, |
|
"throughput": 30.6, |
|
"latency_mean": 32.72214506100218, |
|
"latency_std": 1.383428951575046, |
|
"latency_50": 32.429874, |
|
"latency_90": 34.6945812, |
|
"latency_95": 35.1220015, |
|
"latency_99": 36.2150666, |
|
"latency_999": 36.859092831999995 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 68, |
|
"throughput": 4.53, |
|
"latency_mean": 221.9408882647059, |
|
"latency_std": 1.4998283620039232, |
|
"latency_50": 221.727837, |
|
"latency_90": 223.742314, |
|
"latency_95": 224.84834684999998, |
|
"latency_99": 226.48472676999998, |
|
"latency_999": 227.941314877 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 61, |
|
"throughput": 4.07, |
|
"latency_mean": 246.87498426229507, |
|
"latency_std": 28.403201366430647, |
|
"latency_50": 265.371572, |
|
"latency_90": 266.689959, |
|
"latency_95": 266.835504, |
|
"latency_99": 267.98315759999997, |
|
"latency_999": 268.11036216 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 310, |
|
"throughput": 20.67, |
|
"latency_mean": 48.538128174193545, |
|
"latency_std": 1.0825802524617727, |
|
"latency_50": 48.426445, |
|
"latency_90": 49.9468469, |
|
"latency_95": 50.608303299999996, |
|
"latency_99": 51.1435016, |
|
"latency_999": 51.579964874 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 709, |
|
"throughput": 47.27, |
|
"latency_mean": 21.175880187588152, |
|
"latency_std": 1.3509200916441064, |
|
"latency_50": 21.880383, |
|
"latency_90": 22.094095199999998, |
|
"latency_95": 22.1335476, |
|
"latency_99": 22.22487572, |
|
"latency_999": 22.332830584000003 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 250, |
|
"throughput": 16.67, |
|
"latency_mean": 60.190009684, |
|
"latency_std": 6.476253661495298, |
|
"latency_50": 56.6032025, |
|
"latency_90": 70.51879790000001, |
|
"latency_95": 71.00682115, |
|
"latency_99": 71.97614781, |
|
"latency_999": 72.419652896 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 223, |
|
"throughput": 14.87, |
|
"latency_mean": 67.2927178340807, |
|
"latency_std": 0.144766654312015, |
|
"latency_50": 67.282017, |
|
"latency_90": 67.4824698, |
|
"latency_95": 67.53632420000001, |
|
"latency_99": 67.65417208, |
|
"latency_999": 67.74949128 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 214.69050597142856, |
|
"latency_std": 0.4979447468736716, |
|
"latency_50": 214.625579, |
|
"latency_90": 215.30233869999998, |
|
"latency_95": 215.40087115, |
|
"latency_99": 216.26991978, |
|
"latency_999": 216.828470778 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 72, |
|
"throughput": 4.8, |
|
"latency_mean": 209.0101843472222, |
|
"latency_std": 23.570857468465338, |
|
"latency_50": 199.064305, |
|
"latency_90": 255.36236, |
|
"latency_95": 255.8589584, |
|
"latency_99": 256.32438673, |
|
"latency_999": 256.494448273 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 210, |
|
"throughput": 14.0, |
|
"latency_mean": 71.66073837142856, |
|
"latency_std": 0.48116978977616937, |
|
"latency_50": 71.5734205, |
|
"latency_90": 72.0439302, |
|
"latency_95": 72.4196435, |
|
"latency_99": 73.94004301999999, |
|
"latency_999": 74.513449128 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 256, |
|
"throughput": 17.07, |
|
"latency_mean": 58.67059760546875, |
|
"latency_std": 1.692148816240866, |
|
"latency_50": 58.4892805, |
|
"latency_90": 60.9907035, |
|
"latency_95": 61.989116, |
|
"latency_99": 62.95026975, |
|
"latency_999": 63.22841489 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 49, |
|
"throughput": 3.27, |
|
"latency_mean": 310.00153851020406, |
|
"latency_std": 2.7851646055699177, |
|
"latency_50": 309.754002, |
|
"latency_90": 313.0454142, |
|
"latency_95": 314.881432, |
|
"latency_99": 317.12773244, |
|
"latency_999": 317.61191674400004 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 406.5427697027027, |
|
"latency_std": 6.185204572502198, |
|
"latency_50": 406.487553, |
|
"latency_90": 414.2124184, |
|
"latency_95": 417.1470922, |
|
"latency_99": 419.61633636, |
|
"latency_999": 419.968637436 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 125, |
|
"throughput": 8.33, |
|
"latency_mean": 120.571896528, |
|
"latency_std": 0.47277616561376834, |
|
"latency_50": 120.49457, |
|
"latency_90": 121.2047576, |
|
"latency_95": 121.5551152, |
|
"latency_99": 121.83896872, |
|
"latency_999": 122.05622189200001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 145, |
|
"throughput": 9.67, |
|
"latency_mean": 103.6226343724138, |
|
"latency_std": 2.9157268721681353, |
|
"latency_50": 103.111948, |
|
"latency_90": 107.64045859999999, |
|
"latency_95": 108.98621659999999, |
|
"latency_99": 112.292974, |
|
"latency_999": 112.668847504 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 126, |
|
"throughput": 8.4, |
|
"latency_mean": 119.27741188888889, |
|
"latency_std": 0.8764285887799853, |
|
"latency_50": 119.307692, |
|
"latency_90": 120.473393, |
|
"latency_95": 120.7184835, |
|
"latency_99": 121.2451975, |
|
"latency_999": 121.262551875 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 118, |
|
"throughput": 7.87, |
|
"latency_mean": 127.78634824576271, |
|
"latency_std": 0.3648261747760306, |
|
"latency_50": 127.7387245, |
|
"latency_90": 128.2727152, |
|
"latency_95": 128.37564195000002, |
|
"latency_99": 128.60643422, |
|
"latency_999": 129.373907065 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 59.8, |
|
"f1": 69.2166994698964 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3097.368\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 311, |
|
"throughput": 20.73, |
|
"latency_mean": 48.27436349517685, |
|
"latency_std": 1.040822281053826, |
|
"latency_50": 48.187545, |
|
"latency_90": 49.621862, |
|
"latency_95": 50.0009575, |
|
"latency_99": 50.911865399999996, |
|
"latency_999": 51.49017933 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1292, |
|
"throughput": 86.13, |
|
"latency_mean": 11.618419383126936, |
|
"latency_std": 0.10444681939743777, |
|
"latency_50": 11.602171, |
|
"latency_90": 11.748442599999999, |
|
"latency_95": 11.79717875, |
|
"latency_99": 11.94069171, |
|
"latency_999": 12.056363528000002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 220.32118042028983, |
|
"latency_std": 0.9850709045782986, |
|
"latency_50": 220.356704, |
|
"latency_90": 221.59199759999998, |
|
"latency_95": 221.9246008, |
|
"latency_99": 223.13476428, |
|
"latency_999": 223.77970762799998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 123, |
|
"throughput": 8.2, |
|
"latency_mean": 122.36457313821138, |
|
"latency_std": 10.835259832058163, |
|
"latency_50": 121.072407, |
|
"latency_90": 135.5895634, |
|
"latency_95": 135.8899157, |
|
"latency_99": 138.01084006, |
|
"latency_999": 139.175728762 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 162, |
|
"throughput": 10.8, |
|
"latency_mean": 92.77228116049382, |
|
"latency_std": 9.463275951760943, |
|
"latency_50": 89.6388245, |
|
"latency_90": 117.9440383, |
|
"latency_95": 119.128948, |
|
"latency_99": 119.91939433999998, |
|
"latency_999": 120.517662241 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 234, |
|
"throughput": 15.6, |
|
"latency_mean": 64.20727750854701, |
|
"latency_std": 0.31098450167906233, |
|
"latency_50": 64.201227, |
|
"latency_90": 64.6116797, |
|
"latency_95": 64.72937155, |
|
"latency_99": 65.065812, |
|
"latency_999": 65.162602081 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 408.1440117567568, |
|
"latency_std": 0.7971971555607033, |
|
"latency_50": 408.020052, |
|
"latency_90": 409.07254439999997, |
|
"latency_95": 409.467417, |
|
"latency_99": 410.72945427999997, |
|
"latency_999": 411.308799328 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 64, |
|
"throughput": 4.27, |
|
"latency_mean": 236.944504875, |
|
"latency_std": 20.551884369675392, |
|
"latency_50": 227.635955, |
|
"latency_90": 272.39387389999996, |
|
"latency_95": 272.9781154, |
|
"latency_99": 278.51420387999997, |
|
"latency_999": 282.636704388 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 248, |
|
"throughput": 16.53, |
|
"latency_mean": 60.48698351209678, |
|
"latency_std": 0.5347970640782516, |
|
"latency_50": 60.474966, |
|
"latency_90": 61.153837700000004, |
|
"latency_95": 61.448808, |
|
"latency_99": 61.726770509999994, |
|
"latency_999": 61.795131821000005 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 806, |
|
"throughput": 53.73, |
|
"latency_mean": 18.618812374689824, |
|
"latency_std": 0.9875542199892051, |
|
"latency_50": 18.4731305, |
|
"latency_90": 20.025972, |
|
"latency_95": 20.843049, |
|
"latency_99": 21.69578405, |
|
"latency_999": 22.082188700000003 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 266, |
|
"throughput": 17.73, |
|
"latency_mean": 56.56638917669173, |
|
"latency_std": 1.1636111458496279, |
|
"latency_50": 56.435677, |
|
"latency_90": 58.201028, |
|
"latency_95": 58.79177375, |
|
"latency_99": 59.38910765, |
|
"latency_999": 59.7182885 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 486, |
|
"throughput": 32.4, |
|
"latency_mean": 30.90172244855967, |
|
"latency_std": 1.7590904451420482, |
|
"latency_50": 30.4496635, |
|
"latency_90": 33.697194, |
|
"latency_95": 34.585768, |
|
"latency_99": 35.691296949999995, |
|
"latency_999": 36.21122576 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 72, |
|
"throughput": 4.8, |
|
"latency_mean": 209.6628640277778, |
|
"latency_std": 13.976074504609217, |
|
"latency_50": 214.2633815, |
|
"latency_90": 215.4051871, |
|
"latency_95": 215.8541272, |
|
"latency_99": 218.25931918000003, |
|
"latency_999": 221.387059318 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 141, |
|
"throughput": 9.4, |
|
"latency_mean": 106.43290490780142, |
|
"latency_std": 3.23067147706519, |
|
"latency_50": 106.433161, |
|
"latency_90": 110.193956, |
|
"latency_95": 112.048999, |
|
"latency_99": 115.729673, |
|
"latency_999": 116.92148872000001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 144, |
|
"throughput": 9.6, |
|
"latency_mean": 104.92533156944445, |
|
"latency_std": 14.860284902135193, |
|
"latency_50": 94.1759405, |
|
"latency_90": 121.35962690000001, |
|
"latency_95": 121.7929045, |
|
"latency_99": 122.82086836, |
|
"latency_999": 122.95308226099999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 260, |
|
"throughput": 17.33, |
|
"latency_mean": 57.90458781923077, |
|
"latency_std": 3.0707474582498206, |
|
"latency_50": 57.1554325, |
|
"latency_90": 62.939566, |
|
"latency_95": 64.11821884999999, |
|
"latency_99": 66.00585256000001, |
|
"latency_999": 66.83383903299999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 212, |
|
"throughput": 14.13, |
|
"latency_mean": 71.04031975, |
|
"latency_std": 1.2335423114384807, |
|
"latency_50": 70.953013, |
|
"latency_90": 72.93224040000001, |
|
"latency_95": 73.2437023, |
|
"latency_99": 74.43362789, |
|
"latency_999": 75.253044574 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 396, |
|
"throughput": 26.4, |
|
"latency_mean": 37.94484309343434, |
|
"latency_std": 0.16381138575361315, |
|
"latency_50": 37.9182705, |
|
"latency_90": 38.176733, |
|
"latency_95": 38.23742675, |
|
"latency_99": 38.305533200000006, |
|
"latency_999": 38.36262916 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 54.5, |
|
"f1": 64.29164315407452 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.988\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 407.5374461081081, |
|
"latency_std": 0.9308811800770912, |
|
"latency_50": 407.503129, |
|
"latency_90": 408.4859978, |
|
"latency_95": 409.1130008, |
|
"latency_99": 410.21976224, |
|
"latency_999": 410.505260024 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 67, |
|
"throughput": 4.47, |
|
"latency_mean": 224.46013014925373, |
|
"latency_std": 1.2067813257810087, |
|
"latency_50": 224.222622, |
|
"latency_90": 225.39195980000002, |
|
"latency_95": 226.41629740000002, |
|
"latency_99": 228.78541636000003, |
|
"latency_999": 230.883278236 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 215.47036322857141, |
|
"latency_std": 5.763796300712505, |
|
"latency_50": 216.127969, |
|
"latency_90": 218.3204449, |
|
"latency_95": 218.88104615, |
|
"latency_99": 220.6371353, |
|
"latency_999": 223.19921363 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 144, |
|
"throughput": 9.6, |
|
"latency_mean": 104.44696947222222, |
|
"latency_std": 0.4207091347287099, |
|
"latency_50": 104.4406845, |
|
"latency_90": 104.9043417, |
|
"latency_95": 105.0561619, |
|
"latency_99": 105.80782608999999, |
|
"latency_999": 106.49383402599999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 213, |
|
"throughput": 14.2, |
|
"latency_mean": 70.45866511737088, |
|
"latency_std": 0.2878721089411343, |
|
"latency_50": 70.425888, |
|
"latency_90": 70.864623, |
|
"latency_95": 70.9372132, |
|
"latency_99": 71.19328732, |
|
"latency_999": 71.41624544400001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 603, |
|
"throughput": 40.2, |
|
"latency_mean": 24.88167712437811, |
|
"latency_std": 1.0019642978162417, |
|
"latency_50": 24.758782, |
|
"latency_90": 26.4062178, |
|
"latency_95": 26.7184647, |
|
"latency_99": 27.097667720000004, |
|
"latency_999": 27.482882742000008 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 255, |
|
"throughput": 17.0, |
|
"latency_mean": 58.937400823529416, |
|
"latency_std": 0.9323535241532246, |
|
"latency_50": 58.875322, |
|
"latency_90": 59.8057036, |
|
"latency_95": 60.399553299999994, |
|
"latency_99": 61.85374206, |
|
"latency_999": 62.514518306 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 992, |
|
"throughput": 66.13, |
|
"latency_mean": 15.12884672983871, |
|
"latency_std": 0.9866967082432004, |
|
"latency_50": 14.7669305, |
|
"latency_90": 16.4293394, |
|
"latency_95": 16.48365125, |
|
"latency_99": 16.62896306, |
|
"latency_999": 16.872082149999997 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 213, |
|
"throughput": 14.2, |
|
"latency_mean": 70.56589055399061, |
|
"latency_std": 0.8249341966205053, |
|
"latency_50": 70.62431, |
|
"latency_90": 71.616199, |
|
"latency_95": 71.9370058, |
|
"latency_99": 72.3793054, |
|
"latency_999": 72.699766524 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 544, |
|
"throughput": 36.27, |
|
"latency_mean": 27.58882950367647, |
|
"latency_std": 1.1939137183142827, |
|
"latency_50": 27.347617, |
|
"latency_90": 29.5275534, |
|
"latency_95": 29.9602636, |
|
"latency_99": 30.7295052, |
|
"latency_999": 30.957031331 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 78, |
|
"throughput": 5.2, |
|
"latency_mean": 192.63010338461538, |
|
"latency_std": 24.23487329273834, |
|
"latency_50": 173.7823215, |
|
"latency_90": 222.2715464, |
|
"latency_95": 223.3036971, |
|
"latency_99": 225.55757293000002, |
|
"latency_999": 226.406006593 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 132, |
|
"throughput": 8.8, |
|
"latency_mean": 113.81753161363636, |
|
"latency_std": 0.6467249477070305, |
|
"latency_50": 113.767756, |
|
"latency_90": 114.6572987, |
|
"latency_95": 114.88671805, |
|
"latency_99": 115.46696548, |
|
"latency_999": 115.9667929 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 169, |
|
"throughput": 11.27, |
|
"latency_mean": 89.13836758579882, |
|
"latency_std": 1.8355160566154043, |
|
"latency_50": 89.192802, |
|
"latency_90": 91.4133886, |
|
"latency_95": 92.51820620000001, |
|
"latency_99": 93.29504212, |
|
"latency_999": 93.695905456 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 288, |
|
"throughput": 19.2, |
|
"latency_mean": 52.089132309027775, |
|
"latency_std": 0.19887005817696832, |
|
"latency_50": 52.075364, |
|
"latency_90": 52.3625353, |
|
"latency_95": 52.4649237, |
|
"latency_99": 52.58880765, |
|
"latency_999": 52.706969511 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 428, |
|
"throughput": 28.53, |
|
"latency_mean": 35.0650053457944, |
|
"latency_std": 1.9416905978175623, |
|
"latency_50": 34.4812805, |
|
"latency_90": 38.0941055, |
|
"latency_95": 38.495843199999996, |
|
"latency_99": 39.51390012, |
|
"latency_999": 42.14187876599995 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1628, |
|
"throughput": 108.53, |
|
"latency_mean": 9.214644947174447, |
|
"latency_std": 0.0940311186126701, |
|
"latency_50": 9.205694, |
|
"latency_90": 9.3333551, |
|
"latency_95": 9.36865875, |
|
"latency_99": 9.4546762, |
|
"latency_999": 9.629165349000006 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 125, |
|
"throughput": 8.33, |
|
"latency_mean": 120.566423736, |
|
"latency_std": 0.32773176890043826, |
|
"latency_50": 120.535469, |
|
"latency_90": 121.00340340000001, |
|
"latency_95": 121.16425579999999, |
|
"latency_99": 121.27308992, |
|
"latency_999": 121.643879456 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 277, |
|
"throughput": 18.47, |
|
"latency_mean": 54.24849066787004, |
|
"latency_std": 0.19047805788515895, |
|
"latency_50": 54.224382, |
|
"latency_90": 54.5044332, |
|
"latency_95": 54.6109856, |
|
"latency_99": 54.75149856, |
|
"latency_999": 54.862251523999994 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 72.9, |
|
"f1": 79.96371998744281 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "distilbert-base-uncased-distilled-squad", |
|
"task": "question-answering", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "squad", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"question": "question", |
|
"context": "context" |
|
}, |
|
"ref_keys": [ |
|
"answers" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.997\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 216.4796847285714, |
|
"latency_std": 0.5319523840331125, |
|
"latency_50": 216.4132115, |
|
"latency_90": 217.2235808, |
|
"latency_95": 217.43512135, |
|
"latency_99": 217.93726658, |
|
"latency_999": 218.004904658 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 93, |
|
"throughput": 6.2, |
|
"latency_mean": 162.63612432258063, |
|
"latency_std": 6.7981229484609695, |
|
"latency_50": 160.371024, |
|
"latency_90": 175.9109966, |
|
"latency_95": 178.2932724, |
|
"latency_99": 180.45402764, |
|
"latency_999": 181.166818064 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 250, |
|
"throughput": 16.67, |
|
"latency_mean": 60.13216628, |
|
"latency_std": 1.0214734052265138, |
|
"latency_50": 60.110321, |
|
"latency_90": 61.3823292, |
|
"latency_95": 61.8370275, |
|
"latency_99": 62.49582684, |
|
"latency_999": 62.834720483999995 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 506, |
|
"throughput": 33.73, |
|
"latency_mean": 29.64927941897233, |
|
"latency_std": 0.09914349596696566, |
|
"latency_50": 29.640438, |
|
"latency_90": 29.7649045, |
|
"latency_95": 29.817656, |
|
"latency_99": 29.927720100000002, |
|
"latency_999": 30.19670687 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 125, |
|
"throughput": 8.33, |
|
"latency_mean": 120.849639712, |
|
"latency_std": 0.9668375634433722, |
|
"latency_50": 120.813037, |
|
"latency_90": 121.9735066, |
|
"latency_95": 122.5098556, |
|
"latency_99": 122.86986592, |
|
"latency_999": 123.22419958 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 141, |
|
"throughput": 9.4, |
|
"latency_mean": 106.49951225531915, |
|
"latency_std": 0.31470398129243726, |
|
"latency_50": 106.442906, |
|
"latency_90": 106.932199, |
|
"latency_95": 107.086644, |
|
"latency_99": 107.3766202, |
|
"latency_999": 107.86804946000001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 343, |
|
"throughput": 22.87, |
|
"latency_mean": 43.83403140233236, |
|
"latency_std": 6.777393138738352, |
|
"latency_50": 47.974253, |
|
"latency_90": 49.6771042, |
|
"latency_95": 50.217503, |
|
"latency_99": 50.915752839999996, |
|
"latency_999": 52.175437058000014 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 875, |
|
"throughput": 58.33, |
|
"latency_mean": 17.15841726057143, |
|
"latency_std": 0.08566829655094844, |
|
"latency_50": 17.153516, |
|
"latency_90": 17.2748228, |
|
"latency_95": 17.3054081, |
|
"latency_99": 17.36630658, |
|
"latency_999": 17.49840138 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 128, |
|
"throughput": 8.53, |
|
"latency_mean": 117.3791356640625, |
|
"latency_std": 0.8413632402417973, |
|
"latency_50": 117.250864, |
|
"latency_90": 118.50406629999999, |
|
"latency_95": 118.87116745, |
|
"latency_99": 119.4692367, |
|
"latency_999": 119.927835012 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 144, |
|
"throughput": 9.6, |
|
"latency_mean": 104.27652415277778, |
|
"latency_std": 0.3048652909436058, |
|
"latency_50": 104.223559, |
|
"latency_90": 104.6723871, |
|
"latency_95": 104.8562854, |
|
"latency_99": 105.23841804999999, |
|
"latency_999": 105.67265316 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 68, |
|
"throughput": 4.53, |
|
"latency_mean": 221.2224431470588, |
|
"latency_std": 1.118456788421114, |
|
"latency_50": 221.1416945, |
|
"latency_90": 222.8013713, |
|
"latency_95": 223.27910730000002, |
|
"latency_99": 223.56076078, |
|
"latency_999": 223.828592878 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 68, |
|
"throughput": 4.53, |
|
"latency_mean": 221.36998825, |
|
"latency_std": 0.4904801834733391, |
|
"latency_50": 221.3431115, |
|
"latency_90": 221.8755237, |
|
"latency_95": 221.9769379, |
|
"latency_99": 222.760048, |
|
"latency_999": 223.048282 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 209, |
|
"throughput": 13.93, |
|
"latency_mean": 72.04991935406699, |
|
"latency_std": 0.9649173207131073, |
|
"latency_50": 72.140642, |
|
"latency_90": 73.1535642, |
|
"latency_95": 73.46569579999999, |
|
"latency_99": 74.7303538, |
|
"latency_999": 75.40225632 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 280, |
|
"throughput": 18.67, |
|
"latency_mean": 53.682652257142855, |
|
"latency_std": 0.2593595953484514, |
|
"latency_50": 53.628275, |
|
"latency_90": 53.8589829, |
|
"latency_95": 54.04694715, |
|
"latency_99": 55.05985277, |
|
"latency_999": 55.324252431000005 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 235, |
|
"throughput": 15.67, |
|
"latency_mean": 64.03706125957447, |
|
"latency_std": 7.7729420260039, |
|
"latency_50": 58.940644, |
|
"latency_90": 73.7838802, |
|
"latency_95": 74.4361305, |
|
"latency_99": 75.54929478, |
|
"latency_999": 76.00147653 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 300, |
|
"throughput": 20.0, |
|
"latency_mean": 50.13742374666667, |
|
"latency_std": 4.800538669035651, |
|
"latency_50": 47.3348575, |
|
"latency_90": 57.0711271, |
|
"latency_95": 57.1360735, |
|
"latency_99": 57.42901637, |
|
"latency_999": 57.585291638 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 406.5056369189189, |
|
"latency_std": 0.7541595003868546, |
|
"latency_50": 406.359168, |
|
"latency_90": 407.44441, |
|
"latency_95": 408.065172, |
|
"latency_99": 408.18207568, |
|
"latency_999": 408.195881968 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 39, |
|
"throughput": 2.6, |
|
"latency_mean": 394.84622148717943, |
|
"latency_std": 54.09154075921731, |
|
"latency_50": 441.007729, |
|
"latency_90": 446.218295, |
|
"latency_95": 447.0315637, |
|
"latency_99": 447.1409742, |
|
"latency_999": 447.15734232 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
}, |
|
"optimized": { |
|
"exact_match": 82.3, |
|
"f1": 87.2318519258519 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": 1000, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
} |
|
] |