[ { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "static", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3102.164\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "6911cc6576c9fd373dfdba597e7a6b3ec37c2063" }, "evaluation": { "time": [ { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 145, "throughput": 9.67, "latency_mean": 103.4629717724138, "latency_std": 14.893062293273225, "latency_50": 91.745478, "latency_90": 119.3452324, "latency_95": 119.4975372, "latency_99": 119.96275956000001, "latency_999": 121.099907936 }, "optimized": { "nb_forwards": 279, "throughput": 18.6, "latency_mean": 53.769141777777776, "latency_std": 0.23148430308679216, "latency_50": 53.715052, "latency_90": 54.059919, "latency_95": 54.2278771, "latency_99": 54.520317659999996, "latency_999": 54.651304412 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.9038969616908851, "recall": 0.9212386401884888, "f1": 0.912485414235706, "accuracy": 0.9842295860753086 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "static", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3161.331\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "6911cc6576c9fd373dfdba597e7a6b3ec37c2063" }, "evaluation": { "time": [ { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 166, "throughput": 11.07, "latency_mean": 90.62444262650602, "latency_std": 1.8014393305543155, "latency_50": 90.6704845, "latency_90": 92.920863, "latency_95": 93.45488975, "latency_99": 94.330746, "latency_999": 94.85841647 }, "optimized": { "nb_forwards": 228, "throughput": 15.2, "latency_mean": 65.86275819736842, "latency_std": 0.8604240016957982, "latency_50": 65.8016715, "latency_90": 66.9237761, "latency_95": 67.35874704999999, "latency_99": 68.89251761, "latency_999": 69.285897298 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.06543578604398588, "recall": 0.24335240659710536, "f1": 0.10313837375178317, "accuracy": 0.35697597445582335 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" } ]