[
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "static",
        "operators_to_quantize": [
            "Add"
        ],
        "node_exclusion": [
            "layernorm",
            "gelu",
            "residual",
            "gather",
            "softmax"
        ],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3100.278\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 126,
                        "throughput": 8.4,
                        "latency_mean": 119.09526233333332,
                        "latency_std": 0.7552068299080471,
                        "latency_50": 119.159921,
                        "latency_90": 119.811235,
                        "latency_95": 120.0762565,
                        "latency_99": 121.2652685,
                        "latency_999": 121.44601075
                    },
                    "optimized": {
                        "nb_forwards": 132,
                        "throughput": 8.8,
                        "latency_mean": 114.4251281590909,
                        "latency_std": 0.5121636382229513,
                        "latency_50": 114.325617,
                        "latency_90": 114.8843064,
                        "latency_95": 115.2926259,
                        "latency_99": 116.21156513,
                        "latency_999": 116.902565364
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 440,
                        "throughput": 29.33,
                        "latency_mean": 34.11132023409091,
                        "latency_std": 1.1428837223428832,
                        "latency_50": 34.0117225,
                        "latency_90": 35.7673362,
                        "latency_95": 36.214977399999995,
                        "latency_99": 36.84041992,
                        "latency_999": 37.28137764399999
                    },
                    "optimized": {
                        "nb_forwards": 780,
                        "throughput": 52.0,
                        "latency_mean": 19.232586415384617,
                        "latency_std": 0.19048831587735654,
                        "latency_50": 19.205587,
                        "latency_90": 19.3554198,
                        "latency_95": 19.4416857,
                        "latency_99": 19.747647200000003,
                        "latency_999": 21.31330125800001
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 91,
                        "throughput": 6.07,
                        "latency_mean": 166.1344919010989,
                        "latency_std": 4.663006799959188,
                        "latency_50": 165.748607,
                        "latency_90": 167.573247,
                        "latency_95": 168.185849,
                        "latency_99": 176.2619040999998,
                        "latency_999": 204.17836050999986
                    },
                    "optimized": {
                        "nb_forwards": 68,
                        "throughput": 4.53,
                        "latency_mean": 220.92038330882352,
                        "latency_std": 17.990154740643728,
                        "latency_50": 229.036164,
                        "latency_90": 230.3515418,
                        "latency_95": 230.71870145,
                        "latency_99": 233.38270184,
                        "latency_999": 233.602886084
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 69,
                        "throughput": 4.6,
                        "latency_mean": 220.57769956521742,
                        "latency_std": 0.6686484290829855,
                        "latency_50": 220.51593,
                        "latency_90": 221.12682819999998,
                        "latency_95": 221.66818940000002,
                        "latency_99": 223.12707279999998,
                        "latency_999": 223.50554278
                    },
                    "optimized": {
                        "nb_forwards": 73,
                        "throughput": 4.87,
                        "latency_mean": 207.50999308219178,
                        "latency_std": 27.815623148152877,
                        "latency_50": 189.774877,
                        "latency_90": 237.9391546,
                        "latency_95": 238.31236859999999,
                        "latency_99": 240.56186224,
                        "latency_999": 244.546144024
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 347,
                        "throughput": 23.13,
                        "latency_mean": 43.31934634870317,
                        "latency_std": 1.0549048838002049,
                        "latency_50": 43.200915,
                        "latency_90": 44.882578200000005,
                        "latency_95": 45.2033363,
                        "latency_99": 46.21059290000001,
                        "latency_999": 46.872020362
                    },
                    "optimized": {
                        "nb_forwards": 536,
                        "throughput": 35.73,
                        "latency_mean": 27.99766281716418,
                        "latency_std": 1.0529425569523703,
                        "latency_50": 27.69748,
                        "latency_90": 29.514874,
                        "latency_95": 30.11867,
                        "latency_99": 30.962504349999996,
                        "latency_999": 31.912116325000028
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 166,
                        "throughput": 11.07,
                        "latency_mean": 90.84950987349397,
                        "latency_std": 2.0682139308506917,
                        "latency_50": 90.8146185,
                        "latency_90": 93.020294,
                        "latency_95": 93.80372125,
                        "latency_99": 96.35470339999999,
                        "latency_999": 102.08768143500005
                    },
                    "optimized": {
                        "nb_forwards": 136,
                        "throughput": 9.07,
                        "latency_mean": 110.460723375,
                        "latency_std": 10.94678012157101,
                        "latency_50": 116.622233,
                        "latency_90": 117.13783,
                        "latency_95": 117.34992975,
                        "latency_99": 117.67875504999999,
                        "latency_999": 117.831356625
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 49,
                        "throughput": 3.27,
                        "latency_mean": 309.91095881632657,
                        "latency_std": 3.608767284403935,
                        "latency_50": 310.015937,
                        "latency_90": 313.87800439999995,
                        "latency_95": 314.33423439999996,
                        "latency_99": 318.4142126,
                        "latency_999": 321.07916876
                    },
                    "optimized": {
                        "nb_forwards": 42,
                        "throughput": 2.8,
                        "latency_mean": 357.87234047619046,
                        "latency_std": 12.591861806274563,
                        "latency_50": 355.861331,
                        "latency_90": 371.2001084,
                        "latency_95": 392.5037146499999,
                        "latency_99": 396.58451314,
                        "latency_999": 398.517758014
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 261,
                        "throughput": 17.4,
                        "latency_mean": 57.65446063601532,
                        "latency_std": 2.434519247994199,
                        "latency_50": 56.976535,
                        "latency_90": 60.541172,
                        "latency_95": 61.207556,
                        "latency_99": 68.33251279999983,
                        "latency_999": 73.13234924
                    },
                    "optimized": {
                        "nb_forwards": 259,
                        "throughput": 17.27,
                        "latency_mean": 57.94760827799228,
                        "latency_std": 5.454128073391148,
                        "latency_50": 62.715428,
                        "latency_90": 63.1070498,
                        "latency_95": 63.2122011,
                        "latency_99": 63.43313314,
                        "latency_999": 63.689983864000006
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 241,
                        "throughput": 16.07,
                        "latency_mean": 62.38362809958507,
                        "latency_std": 6.7586580892753,
                        "latency_50": 58.222043,
                        "latency_90": 69.869798,
                        "latency_95": 70.271116,
                        "latency_99": 71.7271214,
                        "latency_999": 80.2065777999999
                    },
                    "optimized": {
                        "nb_forwards": 280,
                        "throughput": 18.67,
                        "latency_mean": 53.64114180357142,
                        "latency_std": 5.084170546990271,
                        "latency_50": 50.8657445,
                        "latency_90": 59.753021700000005,
                        "latency_95": 59.83479735,
                        "latency_99": 59.98286641,
                        "latency_999": 60.134200166
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 76.9,
                    "f1": 83.01425661180923
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    },
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "dynamic",
        "operators_to_quantize": [
            "Add",
            "MatMul"
        ],
        "node_exclusion": [
            "layernorm",
            "gelu",
            "residual",
            "gather",
            "softmax"
        ],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3099.977\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 205,
                        "throughput": 13.67,
                        "latency_mean": 73.39433734146341,
                        "latency_std": 1.0224081759969428,
                        "latency_50": 73.460104,
                        "latency_90": 74.518562,
                        "latency_95": 75.0664468,
                        "latency_99": 75.84203632,
                        "latency_999": 76.921990512
                    },
                    "optimized": {
                        "nb_forwards": 565,
                        "throughput": 37.67,
                        "latency_mean": 26.564435938053098,
                        "latency_std": 0.187471490836448,
                        "latency_50": 26.533942,
                        "latency_90": 26.71886,
                        "latency_95": 26.8078146,
                        "latency_99": 27.12104648,
                        "latency_999": 28.311193836000076
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 88,
                        "throughput": 5.87,
                        "latency_mean": 172.14481681818182,
                        "latency_std": 1.6925453073042733,
                        "latency_50": 172.1907685,
                        "latency_90": 174.1415275,
                        "latency_95": 175.0375522,
                        "latency_99": 175.82339321,
                        "latency_999": 175.940543321
                    },
                    "optimized": {
                        "nb_forwards": 159,
                        "throughput": 10.6,
                        "latency_mean": 94.77951160377359,
                        "latency_std": 4.814673654949999,
                        "latency_50": 96.147419,
                        "latency_90": 97.289479,
                        "latency_95": 97.7011618,
                        "latency_99": 98.90126592,
                        "latency_999": 99.86395080400003
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 314,
                        "throughput": 20.93,
                        "latency_mean": 47.86938606687898,
                        "latency_std": 0.5732448823818431,
                        "latency_50": 47.92357,
                        "latency_90": 48.562472299999996,
                        "latency_95": 48.774001,
                        "latency_99": 49.11157929,
                        "latency_999": 49.426655882
                    },
                    "optimized": {
                        "nb_forwards": 2076,
                        "throughput": 138.4,
                        "latency_mean": 7.228697234585741,
                        "latency_std": 0.31178062817765007,
                        "latency_50": 7.1645175,
                        "latency_90": 7.7391565,
                        "latency_95": 7.850395,
                        "latency_99": 7.99090625,
                        "latency_999": 8.272048450000009
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 124,
                        "throughput": 8.27,
                        "latency_mean": 121.20055792741935,
                        "latency_std": 0.36103071773349715,
                        "latency_50": 121.1512925,
                        "latency_90": 121.64469659999999,
                        "latency_95": 121.8583837,
                        "latency_99": 121.94945611,
                        "latency_999": 122.865046776
                    },
                    "optimized": {
                        "nb_forwards": 374,
                        "throughput": 24.93,
                        "latency_mean": 40.12498197593583,
                        "latency_std": 1.289681830262187,
                        "latency_50": 39.74041,
                        "latency_90": 42.1230914,
                        "latency_95": 42.6942433,
                        "latency_99": 43.790769559999994,
                        "latency_999": 44.761699371
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 37,
                        "throughput": 2.47,
                        "latency_mean": 407.8962844324324,
                        "latency_std": 1.0450568064054995,
                        "latency_50": 407.71793,
                        "latency_90": 409.2667942,
                        "latency_95": 409.5479284,
                        "latency_99": 410.92905384,
                        "latency_999": 411.387726384
                    },
                    "optimized": {
                        "nb_forwards": 100,
                        "throughput": 6.67,
                        "latency_mean": 151.48524175,
                        "latency_std": 2.974018003966197,
                        "latency_50": 151.4110505,
                        "latency_90": 155.56673659999998,
                        "latency_95": 156.83145130000003,
                        "latency_99": 159.26098265000002,
                        "latency_999": 159.893205065
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 209,
                        "throughput": 13.93,
                        "latency_mean": 72.10892416267941,
                        "latency_std": 0.9442605052158691,
                        "latency_50": 72.232233,
                        "latency_90": 73.24587679999999,
                        "latency_95": 73.5688604,
                        "latency_99": 74.49961544,
                        "latency_999": 74.99024779999999
                    },
                    "optimized": {
                        "nb_forwards": 689,
                        "throughput": 45.93,
                        "latency_mean": 21.802329773584905,
                        "latency_std": 1.2356400802889618,
                        "latency_50": 21.462006,
                        "latency_90": 23.394015399999997,
                        "latency_95": 23.461499,
                        "latency_99": 23.60606868,
                        "latency_999": 23.790571160000002
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 134,
                        "throughput": 8.93,
                        "latency_mean": 112.67185172388061,
                        "latency_std": 9.561218465080065,
                        "latency_50": 117.305159,
                        "latency_90": 118.676979,
                        "latency_95": 119.01818645,
                        "latency_99": 119.38788219,
                        "latency_999": 119.960128779
                    },
                    "optimized": {
                        "nb_forwards": 347,
                        "throughput": 23.13,
                        "latency_mean": 43.26179646974064,
                        "latency_std": 0.18815052466746143,
                        "latency_50": 43.253195,
                        "latency_90": 43.494735799999994,
                        "latency_95": 43.5794423,
                        "latency_99": 43.73514478,
                        "latency_999": 44.139542132
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 251,
                        "throughput": 16.73,
                        "latency_mean": 59.92467478486056,
                        "latency_std": 0.5304618192835174,
                        "latency_50": 59.959358,
                        "latency_90": 60.55806,
                        "latency_95": 60.8509665,
                        "latency_99": 61.1526405,
                        "latency_999": 61.21307125
                    },
                    "optimized": {
                        "nb_forwards": 1191,
                        "throughput": 79.4,
                        "latency_mean": 12.6018126565911,
                        "latency_std": 0.417689097037675,
                        "latency_50": 12.531897,
                        "latency_90": 13.148671,
                        "latency_95": 13.431785,
                        "latency_99": 13.783848599999999,
                        "latency_999": 14.088670549999993
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 70,
                        "throughput": 4.67,
                        "latency_mean": 215.0252773857143,
                        "latency_std": 8.088753831054484,
                        "latency_50": 216.4045865,
                        "latency_90": 218.0282024,
                        "latency_95": 218.17546385,
                        "latency_99": 219.1229224,
                        "latency_999": 219.88093984
                    },
                    "optimized": {
                        "nb_forwards": 193,
                        "throughput": 12.87,
                        "latency_mean": 78.03187068393781,
                        "latency_std": 5.702629213049006,
                        "latency_50": 75.158498,
                        "latency_90": 88.1716502,
                        "latency_95": 88.3947608,
                        "latency_99": 88.80716235999999,
                        "latency_999": 89.147765224
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 80.6,
                    "f1": 86.09652042402038
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    },
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "dynamic",
        "operators_to_quantize": [
            "Add",
            "MatMul"
        ],
        "node_exclusion": [],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3100.086\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 70,
                        "throughput": 4.67,
                        "latency_mean": 214.7585842714286,
                        "latency_std": 1.0748282952739399,
                        "latency_50": 214.6217865,
                        "latency_90": 216.33038530000002,
                        "latency_95": 216.71350644999998,
                        "latency_99": 217.10804912999998,
                        "latency_999": 217.27182111300002
                    },
                    "optimized": {
                        "nb_forwards": 173,
                        "throughput": 11.53,
                        "latency_mean": 87.19321883815029,
                        "latency_std": 0.4127804706511856,
                        "latency_50": 87.115246,
                        "latency_90": 87.827362,
                        "latency_95": 87.9646276,
                        "latency_99": 88.20961412,
                        "latency_999": 88.528557128
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 69,
                        "throughput": 4.6,
                        "latency_mean": 220.37778250724637,
                        "latency_std": 0.6723770956181739,
                        "latency_50": 220.31347,
                        "latency_90": 221.0006854,
                        "latency_95": 221.6680164,
                        "latency_99": 222.58888876,
                        "latency_999": 222.634509076
                    },
                    "optimized": {
                        "nb_forwards": 179,
                        "throughput": 11.93,
                        "latency_mean": 84.18192030726257,
                        "latency_std": 1.7207141010005933,
                        "latency_50": 84.029571,
                        "latency_90": 86.4943866,
                        "latency_95": 87.52624759999999,
                        "latency_99": 88.34396318,
                        "latency_999": 89.769148814
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 165,
                        "throughput": 11.0,
                        "latency_mean": 90.97119612121213,
                        "latency_std": 1.6585097335652936,
                        "latency_50": 90.91036,
                        "latency_90": 93.046208,
                        "latency_95": 93.5801342,
                        "latency_99": 95.02802792,
                        "latency_999": 95.151213372
                    },
                    "optimized": {
                        "nb_forwards": 362,
                        "throughput": 24.13,
                        "latency_mean": 41.510697558011046,
                        "latency_std": 2.6850168617333043,
                        "latency_50": 40.5225655,
                        "latency_90": 45.749891299999994,
                        "latency_95": 45.84510315,
                        "latency_99": 46.05479407,
                        "latency_999": 46.304120276
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 157,
                        "throughput": 10.47,
                        "latency_mean": 95.77657172611464,
                        "latency_std": 10.384333636895294,
                        "latency_50": 91.24492,
                        "latency_90": 118.03857640000001,
                        "latency_95": 119.1807458,
                        "latency_99": 120.53466472,
                        "latency_999": 121.96483704799999
                    },
                    "optimized": {
                        "nb_forwards": 369,
                        "throughput": 24.6,
                        "latency_mean": 40.65512325745258,
                        "latency_std": 2.8634757589258784,
                        "latency_50": 42.886645,
                        "latency_90": 43.2775952,
                        "latency_95": 43.3398654,
                        "latency_99": 43.578850839999994,
                        "latency_999": 43.896356176
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 37,
                        "throughput": 2.47,
                        "latency_mean": 407.3405624594595,
                        "latency_std": 0.9446139169585147,
                        "latency_50": 407.370805,
                        "latency_90": 408.153985,
                        "latency_95": 408.5356864,
                        "latency_99": 410.3574352,
                        "latency_999": 411.19967482
                    },
                    "optimized": {
                        "nb_forwards": 98,
                        "throughput": 6.53,
                        "latency_mean": 154.55170329591837,
                        "latency_std": 3.2466049439500617,
                        "latency_50": 153.957612,
                        "latency_90": 159.2587284,
                        "latency_95": 159.9041657,
                        "latency_99": 163.49634651,
                        "latency_999": 165.108355851
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 206,
                        "throughput": 13.73,
                        "latency_mean": 73.15383589805825,
                        "latency_std": 1.109006864241087,
                        "latency_50": 72.793732,
                        "latency_90": 75.0387975,
                        "latency_95": 75.40630425,
                        "latency_99": 76.2248304,
                        "latency_999": 80.54231973499994
                    },
                    "optimized": {
                        "nb_forwards": 725,
                        "throughput": 48.33,
                        "latency_mean": 20.701377126896553,
                        "latency_std": 0.5521964132279599,
                        "latency_50": 20.611305,
                        "latency_90": 21.5047578,
                        "latency_95": 21.702541800000002,
                        "latency_99": 22.15760376,
                        "latency_999": 24.283321767999993
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 312,
                        "throughput": 20.8,
                        "latency_mean": 48.10309290064102,
                        "latency_std": 0.5452673615135677,
                        "latency_50": 48.1016005,
                        "latency_90": 48.788028,
                        "latency_95": 48.96113875,
                        "latency_99": 49.29684296,
                        "latency_999": 49.564814242
                    },
                    "optimized": {
                        "nb_forwards": 2102,
                        "throughput": 140.13,
                        "latency_mean": 7.138094656517603,
                        "latency_std": 0.28361271957621664,
                        "latency_50": 7.1017695,
                        "latency_90": 7.5524614,
                        "latency_95": 7.751003399999999,
                        "latency_99": 7.9702024399999996,
                        "latency_999": 8.201174685999996
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 252,
                        "throughput": 16.8,
                        "latency_mean": 59.635192896825394,
                        "latency_std": 0.5367633151420198,
                        "latency_50": 59.6327055,
                        "latency_90": 60.299356,
                        "latency_95": 60.47347955,
                        "latency_99": 60.860342730000006,
                        "latency_999": 60.983657111
                    },
                    "optimized": {
                        "nb_forwards": 1132,
                        "throughput": 75.47,
                        "latency_mean": 13.252757100706713,
                        "latency_std": 0.7118720708722511,
                        "latency_50": 13.7066955,
                        "latency_90": 13.920642699999998,
                        "latency_95": 13.9857479,
                        "latency_99": 14.104317450000002,
                        "latency_999": 14.310801054999994
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 261,
                        "throughput": 17.4,
                        "latency_mean": 57.64333686206896,
                        "latency_std": 1.0745535083532258,
                        "latency_50": 57.643633,
                        "latency_90": 59.099249,
                        "latency_95": 59.402868,
                        "latency_99": 59.834367,
                        "latency_999": 60.372973120000005
                    },
                    "optimized": {
                        "nb_forwards": 641,
                        "throughput": 42.73,
                        "latency_mean": 23.416976716068643,
                        "latency_std": 0.819280052981588,
                        "latency_50": 23.204464,
                        "latency_90": 24.644357,
                        "latency_95": 24.972543,
                        "latency_99": 25.292027,
                        "latency_999": 26.127226000000014
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 80.6,
                    "f1": 86.09652042402038
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    },
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "dynamic",
        "operators_to_quantize": [
            "Add"
        ],
        "node_exclusion": [],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3099.804\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 74,
                        "throughput": 4.93,
                        "latency_mean": 203.90076075675674,
                        "latency_std": 21.596870608446135,
                        "latency_50": 219.806601,
                        "latency_90": 222.28046669999998,
                        "latency_95": 222.6357457,
                        "latency_99": 223.52929084000002,
                        "latency_999": 223.571662084
                    },
                    "optimized": {
                        "nb_forwards": 86,
                        "throughput": 5.73,
                        "latency_mean": 175.15544305813953,
                        "latency_std": 18.874085885764828,
                        "latency_50": 167.724299,
                        "latency_90": 218.6726605,
                        "latency_95": 218.96398125,
                        "latency_99": 219.63268945,
                        "latency_999": 219.994796845
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 252,
                        "throughput": 16.8,
                        "latency_mean": 59.620304126984124,
                        "latency_std": 0.9369837443196661,
                        "latency_50": 59.6408835,
                        "latency_90": 60.899260700000006,
                        "latency_95": 61.2446919,
                        "latency_99": 61.72324802,
                        "latency_999": 62.219581975000004
                    },
                    "optimized": {
                        "nb_forwards": 509,
                        "throughput": 33.93,
                        "latency_mean": 29.505452408644402,
                        "latency_std": 0.10477107837580692,
                        "latency_50": 29.494157,
                        "latency_90": 29.626543,
                        "latency_95": 29.6817152,
                        "latency_99": 29.80756116,
                        "latency_999": 30.165765552000007
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 127,
                        "throughput": 8.47,
                        "latency_mean": 118.58148563779527,
                        "latency_std": 0.4419650117831178,
                        "latency_50": 118.534868,
                        "latency_90": 119.0686048,
                        "latency_95": 119.2944373,
                        "latency_99": 120.15261722,
                        "latency_999": 120.63352342200001
                    },
                    "optimized": {
                        "nb_forwards": 141,
                        "throughput": 9.4,
                        "latency_mean": 106.55073221985815,
                        "latency_std": 0.3944079713253884,
                        "latency_50": 106.498131,
                        "latency_90": 106.921763,
                        "latency_95": 107.019295,
                        "latency_99": 107.93368059999999,
                        "latency_999": 109.45881622000002
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 440,
                        "throughput": 29.33,
                        "latency_mean": 34.126477825,
                        "latency_std": 1.0283869127760359,
                        "latency_50": 34.086549,
                        "latency_90": 35.5103139,
                        "latency_95": 35.82743535,
                        "latency_99": 36.42215259,
                        "latency_999": 36.908125651999995
                    },
                    "optimized": {
                        "nb_forwards": 882,
                        "throughput": 58.8,
                        "latency_mean": 17.017291861678004,
                        "latency_std": 0.08720510862403949,
                        "latency_50": 17.0091915,
                        "latency_90": 17.1355021,
                        "latency_95": 17.17376905,
                        "latency_99": 17.25574468,
                        "latency_999": 17.338018466
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 168,
                        "throughput": 11.2,
                        "latency_mean": 89.80876792857143,
                        "latency_std": 2.724045354234363,
                        "latency_50": 89.23447,
                        "latency_90": 92.91518060000001,
                        "latency_95": 95.965778,
                        "latency_99": 99.1532608,
                        "latency_999": 100.916031267
                    },
                    "optimized": {
                        "nb_forwards": 165,
                        "throughput": 11.0,
                        "latency_mean": 91.00189352121211,
                        "latency_std": 11.013642722583302,
                        "latency_50": 85.592419,
                        "latency_90": 104.2900912,
                        "latency_95": 104.361947,
                        "latency_99": 104.4810248,
                        "latency_999": 104.49576398800001
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 74,
                        "throughput": 4.93,
                        "latency_mean": 204.2916795945946,
                        "latency_std": 20.441719346331414,
                        "latency_50": 214.991102,
                        "latency_90": 216.5581006,
                        "latency_95": 217.0407147,
                        "latency_99": 217.41635044999998,
                        "latency_999": 217.686203345
                    },
                    "optimized": {
                        "nb_forwards": 71,
                        "throughput": 4.73,
                        "latency_mean": 212.32542294366195,
                        "latency_std": 0.531422234525691,
                        "latency_50": 212.331527,
                        "latency_90": 212.977908,
                        "latency_95": 213.082789,
                        "latency_99": 213.5128688,
                        "latency_999": 214.10746658000002
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 49,
                        "throughput": 3.27,
                        "latency_mean": 309.52593997959184,
                        "latency_std": 3.142891825034627,
                        "latency_50": 309.382131,
                        "latency_90": 314.37877760000003,
                        "latency_95": 315.6069922,
                        "latency_99": 315.9048882,
                        "latency_999": 315.94479852
                    },
                    "optimized": {
                        "nb_forwards": 34,
                        "throughput": 2.27,
                        "latency_mean": 445.23998747058823,
                        "latency_std": 1.320393415283763,
                        "latency_50": 445.3137435,
                        "latency_90": 446.8903998,
                        "latency_95": 447.35347025,
                        "latency_99": 447.77492344999996,
                        "latency_999": 447.88785324500003
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 268,
                        "throughput": 17.87,
                        "latency_mean": 55.97453225746269,
                        "latency_std": 1.0305715235126534,
                        "latency_50": 55.880397,
                        "latency_90": 57.251859200000006,
                        "latency_95": 57.9394124,
                        "latency_99": 58.42131721,
                        "latency_999": 58.653711982
                    },
                    "optimized": {
                        "nb_forwards": 280,
                        "throughput": 18.67,
                        "latency_mean": 53.5968861,
                        "latency_std": 0.09504994641745691,
                        "latency_50": 53.589719,
                        "latency_90": 53.727543700000005,
                        "latency_95": 53.763608700000006,
                        "latency_99": 53.84968344,
                        "latency_999": 53.900073552
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 207,
                        "throughput": 13.8,
                        "latency_mean": 72.80511813526569,
                        "latency_std": 0.28605904617544076,
                        "latency_50": 72.792014,
                        "latency_90": 73.14956740000001,
                        "latency_95": 73.2242503,
                        "latency_99": 73.44227836,
                        "latency_999": 74.268179328
                    },
                    "optimized": {
                        "nb_forwards": 263,
                        "throughput": 17.53,
                        "latency_mean": 57.04931692775665,
                        "latency_std": 0.1852668604685806,
                        "latency_50": 57.008233,
                        "latency_90": 57.251367200000004,
                        "latency_95": 57.3371062,
                        "latency_99": 57.79880592,
                        "latency_999": 58.027793358000004
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    },
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "static",
        "operators_to_quantize": [
            "Add"
        ],
        "node_exclusion": [],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3099.838\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 340,
                        "throughput": 22.67,
                        "latency_mean": 44.18998163235294,
                        "latency_std": 1.4697810745421789,
                        "latency_50": 43.944576,
                        "latency_90": 46.3169695,
                        "latency_95": 46.85474685,
                        "latency_99": 47.600308,
                        "latency_999": 48.253145861
                    },
                    "optimized": {
                        "nb_forwards": 459,
                        "throughput": 30.6,
                        "latency_mean": 32.72214506100218,
                        "latency_std": 1.383428951575046,
                        "latency_50": 32.429874,
                        "latency_90": 34.6945812,
                        "latency_95": 35.1220015,
                        "latency_99": 36.2150666,
                        "latency_999": 36.859092831999995
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 68,
                        "throughput": 4.53,
                        "latency_mean": 221.9408882647059,
                        "latency_std": 1.4998283620039232,
                        "latency_50": 221.727837,
                        "latency_90": 223.742314,
                        "latency_95": 224.84834684999998,
                        "latency_99": 226.48472676999998,
                        "latency_999": 227.941314877
                    },
                    "optimized": {
                        "nb_forwards": 61,
                        "throughput": 4.07,
                        "latency_mean": 246.87498426229507,
                        "latency_std": 28.403201366430647,
                        "latency_50": 265.371572,
                        "latency_90": 266.689959,
                        "latency_95": 266.835504,
                        "latency_99": 267.98315759999997,
                        "latency_999": 268.11036216
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 310,
                        "throughput": 20.67,
                        "latency_mean": 48.538128174193545,
                        "latency_std": 1.0825802524617727,
                        "latency_50": 48.426445,
                        "latency_90": 49.9468469,
                        "latency_95": 50.608303299999996,
                        "latency_99": 51.1435016,
                        "latency_999": 51.579964874
                    },
                    "optimized": {
                        "nb_forwards": 709,
                        "throughput": 47.27,
                        "latency_mean": 21.175880187588152,
                        "latency_std": 1.3509200916441064,
                        "latency_50": 21.880383,
                        "latency_90": 22.094095199999998,
                        "latency_95": 22.1335476,
                        "latency_99": 22.22487572,
                        "latency_999": 22.332830584000003
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 250,
                        "throughput": 16.67,
                        "latency_mean": 60.190009684,
                        "latency_std": 6.476253661495298,
                        "latency_50": 56.6032025,
                        "latency_90": 70.51879790000001,
                        "latency_95": 71.00682115,
                        "latency_99": 71.97614781,
                        "latency_999": 72.419652896
                    },
                    "optimized": {
                        "nb_forwards": 223,
                        "throughput": 14.87,
                        "latency_mean": 67.2927178340807,
                        "latency_std": 0.144766654312015,
                        "latency_50": 67.282017,
                        "latency_90": 67.4824698,
                        "latency_95": 67.53632420000001,
                        "latency_99": 67.65417208,
                        "latency_999": 67.74949128
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 70,
                        "throughput": 4.67,
                        "latency_mean": 214.69050597142856,
                        "latency_std": 0.4979447468736716,
                        "latency_50": 214.625579,
                        "latency_90": 215.30233869999998,
                        "latency_95": 215.40087115,
                        "latency_99": 216.26991978,
                        "latency_999": 216.828470778
                    },
                    "optimized": {
                        "nb_forwards": 72,
                        "throughput": 4.8,
                        "latency_mean": 209.0101843472222,
                        "latency_std": 23.570857468465338,
                        "latency_50": 199.064305,
                        "latency_90": 255.36236,
                        "latency_95": 255.8589584,
                        "latency_99": 256.32438673,
                        "latency_999": 256.494448273
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 210,
                        "throughput": 14.0,
                        "latency_mean": 71.66073837142856,
                        "latency_std": 0.48116978977616937,
                        "latency_50": 71.5734205,
                        "latency_90": 72.0439302,
                        "latency_95": 72.4196435,
                        "latency_99": 73.94004301999999,
                        "latency_999": 74.513449128
                    },
                    "optimized": {
                        "nb_forwards": 256,
                        "throughput": 17.07,
                        "latency_mean": 58.67059760546875,
                        "latency_std": 1.692148816240866,
                        "latency_50": 58.4892805,
                        "latency_90": 60.9907035,
                        "latency_95": 61.989116,
                        "latency_99": 62.95026975,
                        "latency_999": 63.22841489
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 49,
                        "throughput": 3.27,
                        "latency_mean": 310.00153851020406,
                        "latency_std": 2.7851646055699177,
                        "latency_50": 309.754002,
                        "latency_90": 313.0454142,
                        "latency_95": 314.881432,
                        "latency_99": 317.12773244,
                        "latency_999": 317.61191674400004
                    },
                    "optimized": {
                        "nb_forwards": 37,
                        "throughput": 2.47,
                        "latency_mean": 406.5427697027027,
                        "latency_std": 6.185204572502198,
                        "latency_50": 406.487553,
                        "latency_90": 414.2124184,
                        "latency_95": 417.1470922,
                        "latency_99": 419.61633636,
                        "latency_999": 419.968637436
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 125,
                        "throughput": 8.33,
                        "latency_mean": 120.571896528,
                        "latency_std": 0.47277616561376834,
                        "latency_50": 120.49457,
                        "latency_90": 121.2047576,
                        "latency_95": 121.5551152,
                        "latency_99": 121.83896872,
                        "latency_999": 122.05622189200001
                    },
                    "optimized": {
                        "nb_forwards": 145,
                        "throughput": 9.67,
                        "latency_mean": 103.6226343724138,
                        "latency_std": 2.9157268721681353,
                        "latency_50": 103.111948,
                        "latency_90": 107.64045859999999,
                        "latency_95": 108.98621659999999,
                        "latency_99": 112.292974,
                        "latency_999": 112.668847504
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 126,
                        "throughput": 8.4,
                        "latency_mean": 119.27741188888889,
                        "latency_std": 0.8764285887799853,
                        "latency_50": 119.307692,
                        "latency_90": 120.473393,
                        "latency_95": 120.7184835,
                        "latency_99": 121.2451975,
                        "latency_999": 121.262551875
                    },
                    "optimized": {
                        "nb_forwards": 118,
                        "throughput": 7.87,
                        "latency_mean": 127.78634824576271,
                        "latency_std": 0.3648261747760306,
                        "latency_50": 127.7387245,
                        "latency_90": 128.2727152,
                        "latency_95": 128.37564195000002,
                        "latency_99": 128.60643422,
                        "latency_999": 129.373907065
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 59.8,
                    "f1": 69.2166994698964
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    },
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "static",
        "operators_to_quantize": [
            "Add",
            "MatMul"
        ],
        "node_exclusion": [],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3097.368\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 311,
                        "throughput": 20.73,
                        "latency_mean": 48.27436349517685,
                        "latency_std": 1.040822281053826,
                        "latency_50": 48.187545,
                        "latency_90": 49.621862,
                        "latency_95": 50.0009575,
                        "latency_99": 50.911865399999996,
                        "latency_999": 51.49017933
                    },
                    "optimized": {
                        "nb_forwards": 1292,
                        "throughput": 86.13,
                        "latency_mean": 11.618419383126936,
                        "latency_std": 0.10444681939743777,
                        "latency_50": 11.602171,
                        "latency_90": 11.748442599999999,
                        "latency_95": 11.79717875,
                        "latency_99": 11.94069171,
                        "latency_999": 12.056363528000002
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 69,
                        "throughput": 4.6,
                        "latency_mean": 220.32118042028983,
                        "latency_std": 0.9850709045782986,
                        "latency_50": 220.356704,
                        "latency_90": 221.59199759999998,
                        "latency_95": 221.9246008,
                        "latency_99": 223.13476428,
                        "latency_999": 223.77970762799998
                    },
                    "optimized": {
                        "nb_forwards": 123,
                        "throughput": 8.2,
                        "latency_mean": 122.36457313821138,
                        "latency_std": 10.835259832058163,
                        "latency_50": 121.072407,
                        "latency_90": 135.5895634,
                        "latency_95": 135.8899157,
                        "latency_99": 138.01084006,
                        "latency_999": 139.175728762
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 162,
                        "throughput": 10.8,
                        "latency_mean": 92.77228116049382,
                        "latency_std": 9.463275951760943,
                        "latency_50": 89.6388245,
                        "latency_90": 117.9440383,
                        "latency_95": 119.128948,
                        "latency_99": 119.91939433999998,
                        "latency_999": 120.517662241
                    },
                    "optimized": {
                        "nb_forwards": 234,
                        "throughput": 15.6,
                        "latency_mean": 64.20727750854701,
                        "latency_std": 0.31098450167906233,
                        "latency_50": 64.201227,
                        "latency_90": 64.6116797,
                        "latency_95": 64.72937155,
                        "latency_99": 65.065812,
                        "latency_999": 65.162602081
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 37,
                        "throughput": 2.47,
                        "latency_mean": 408.1440117567568,
                        "latency_std": 0.7971971555607033,
                        "latency_50": 408.020052,
                        "latency_90": 409.07254439999997,
                        "latency_95": 409.467417,
                        "latency_99": 410.72945427999997,
                        "latency_999": 411.308799328
                    },
                    "optimized": {
                        "nb_forwards": 64,
                        "throughput": 4.27,
                        "latency_mean": 236.944504875,
                        "latency_std": 20.551884369675392,
                        "latency_50": 227.635955,
                        "latency_90": 272.39387389999996,
                        "latency_95": 272.9781154,
                        "latency_99": 278.51420387999997,
                        "latency_999": 282.636704388
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 248,
                        "throughput": 16.53,
                        "latency_mean": 60.48698351209678,
                        "latency_std": 0.5347970640782516,
                        "latency_50": 60.474966,
                        "latency_90": 61.153837700000004,
                        "latency_95": 61.448808,
                        "latency_99": 61.726770509999994,
                        "latency_999": 61.795131821000005
                    },
                    "optimized": {
                        "nb_forwards": 806,
                        "throughput": 53.73,
                        "latency_mean": 18.618812374689824,
                        "latency_std": 0.9875542199892051,
                        "latency_50": 18.4731305,
                        "latency_90": 20.025972,
                        "latency_95": 20.843049,
                        "latency_99": 21.69578405,
                        "latency_999": 22.082188700000003
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 266,
                        "throughput": 17.73,
                        "latency_mean": 56.56638917669173,
                        "latency_std": 1.1636111458496279,
                        "latency_50": 56.435677,
                        "latency_90": 58.201028,
                        "latency_95": 58.79177375,
                        "latency_99": 59.38910765,
                        "latency_999": 59.7182885
                    },
                    "optimized": {
                        "nb_forwards": 486,
                        "throughput": 32.4,
                        "latency_mean": 30.90172244855967,
                        "latency_std": 1.7590904451420482,
                        "latency_50": 30.4496635,
                        "latency_90": 33.697194,
                        "latency_95": 34.585768,
                        "latency_99": 35.691296949999995,
                        "latency_999": 36.21122576
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 72,
                        "throughput": 4.8,
                        "latency_mean": 209.6628640277778,
                        "latency_std": 13.976074504609217,
                        "latency_50": 214.2633815,
                        "latency_90": 215.4051871,
                        "latency_95": 215.8541272,
                        "latency_99": 218.25931918000003,
                        "latency_999": 221.387059318
                    },
                    "optimized": {
                        "nb_forwards": 141,
                        "throughput": 9.4,
                        "latency_mean": 106.43290490780142,
                        "latency_std": 3.23067147706519,
                        "latency_50": 106.433161,
                        "latency_90": 110.193956,
                        "latency_95": 112.048999,
                        "latency_99": 115.729673,
                        "latency_999": 116.92148872000001
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 144,
                        "throughput": 9.6,
                        "latency_mean": 104.92533156944445,
                        "latency_std": 14.860284902135193,
                        "latency_50": 94.1759405,
                        "latency_90": 121.35962690000001,
                        "latency_95": 121.7929045,
                        "latency_99": 122.82086836,
                        "latency_999": 122.95308226099999
                    },
                    "optimized": {
                        "nb_forwards": 260,
                        "throughput": 17.33,
                        "latency_mean": 57.90458781923077,
                        "latency_std": 3.0707474582498206,
                        "latency_50": 57.1554325,
                        "latency_90": 62.939566,
                        "latency_95": 64.11821884999999,
                        "latency_99": 66.00585256000001,
                        "latency_999": 66.83383903299999
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 212,
                        "throughput": 14.13,
                        "latency_mean": 71.04031975,
                        "latency_std": 1.2335423114384807,
                        "latency_50": 70.953013,
                        "latency_90": 72.93224040000001,
                        "latency_95": 73.2437023,
                        "latency_99": 74.43362789,
                        "latency_999": 75.253044574
                    },
                    "optimized": {
                        "nb_forwards": 396,
                        "throughput": 26.4,
                        "latency_mean": 37.94484309343434,
                        "latency_std": 0.16381138575361315,
                        "latency_50": 37.9182705,
                        "latency_90": 38.176733,
                        "latency_95": 38.23742675,
                        "latency_99": 38.305533200000006,
                        "latency_999": 38.36262916
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 54.5,
                    "f1": 64.29164315407452
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    },
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "static",
        "operators_to_quantize": [
            "Add",
            "MatMul"
        ],
        "node_exclusion": [
            "layernorm",
            "gelu",
            "residual",
            "gather",
            "softmax"
        ],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3099.988\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 37,
                        "throughput": 2.47,
                        "latency_mean": 407.5374461081081,
                        "latency_std": 0.9308811800770912,
                        "latency_50": 407.503129,
                        "latency_90": 408.4859978,
                        "latency_95": 409.1130008,
                        "latency_99": 410.21976224,
                        "latency_999": 410.505260024
                    },
                    "optimized": {
                        "nb_forwards": 67,
                        "throughput": 4.47,
                        "latency_mean": 224.46013014925373,
                        "latency_std": 1.2067813257810087,
                        "latency_50": 224.222622,
                        "latency_90": 225.39195980000002,
                        "latency_95": 226.41629740000002,
                        "latency_99": 228.78541636000003,
                        "latency_999": 230.883278236
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 70,
                        "throughput": 4.67,
                        "latency_mean": 215.47036322857141,
                        "latency_std": 5.763796300712505,
                        "latency_50": 216.127969,
                        "latency_90": 218.3204449,
                        "latency_95": 218.88104615,
                        "latency_99": 220.6371353,
                        "latency_999": 223.19921363
                    },
                    "optimized": {
                        "nb_forwards": 144,
                        "throughput": 9.6,
                        "latency_mean": 104.44696947222222,
                        "latency_std": 0.4207091347287099,
                        "latency_50": 104.4406845,
                        "latency_90": 104.9043417,
                        "latency_95": 105.0561619,
                        "latency_99": 105.80782608999999,
                        "latency_999": 106.49383402599999
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 213,
                        "throughput": 14.2,
                        "latency_mean": 70.45866511737088,
                        "latency_std": 0.2878721089411343,
                        "latency_50": 70.425888,
                        "latency_90": 70.864623,
                        "latency_95": 70.9372132,
                        "latency_99": 71.19328732,
                        "latency_999": 71.41624544400001
                    },
                    "optimized": {
                        "nb_forwards": 603,
                        "throughput": 40.2,
                        "latency_mean": 24.88167712437811,
                        "latency_std": 1.0019642978162417,
                        "latency_50": 24.758782,
                        "latency_90": 26.4062178,
                        "latency_95": 26.7184647,
                        "latency_99": 27.097667720000004,
                        "latency_999": 27.482882742000008
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 255,
                        "throughput": 17.0,
                        "latency_mean": 58.937400823529416,
                        "latency_std": 0.9323535241532246,
                        "latency_50": 58.875322,
                        "latency_90": 59.8057036,
                        "latency_95": 60.399553299999994,
                        "latency_99": 61.85374206,
                        "latency_999": 62.514518306
                    },
                    "optimized": {
                        "nb_forwards": 992,
                        "throughput": 66.13,
                        "latency_mean": 15.12884672983871,
                        "latency_std": 0.9866967082432004,
                        "latency_50": 14.7669305,
                        "latency_90": 16.4293394,
                        "latency_95": 16.48365125,
                        "latency_99": 16.62896306,
                        "latency_999": 16.872082149999997
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 213,
                        "throughput": 14.2,
                        "latency_mean": 70.56589055399061,
                        "latency_std": 0.8249341966205053,
                        "latency_50": 70.62431,
                        "latency_90": 71.616199,
                        "latency_95": 71.9370058,
                        "latency_99": 72.3793054,
                        "latency_999": 72.699766524
                    },
                    "optimized": {
                        "nb_forwards": 544,
                        "throughput": 36.27,
                        "latency_mean": 27.58882950367647,
                        "latency_std": 1.1939137183142827,
                        "latency_50": 27.347617,
                        "latency_90": 29.5275534,
                        "latency_95": 29.9602636,
                        "latency_99": 30.7295052,
                        "latency_999": 30.957031331
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 78,
                        "throughput": 5.2,
                        "latency_mean": 192.63010338461538,
                        "latency_std": 24.23487329273834,
                        "latency_50": 173.7823215,
                        "latency_90": 222.2715464,
                        "latency_95": 223.3036971,
                        "latency_99": 225.55757293000002,
                        "latency_999": 226.406006593
                    },
                    "optimized": {
                        "nb_forwards": 132,
                        "throughput": 8.8,
                        "latency_mean": 113.81753161363636,
                        "latency_std": 0.6467249477070305,
                        "latency_50": 113.767756,
                        "latency_90": 114.6572987,
                        "latency_95": 114.88671805,
                        "latency_99": 115.46696548,
                        "latency_999": 115.9667929
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 169,
                        "throughput": 11.27,
                        "latency_mean": 89.13836758579882,
                        "latency_std": 1.8355160566154043,
                        "latency_50": 89.192802,
                        "latency_90": 91.4133886,
                        "latency_95": 92.51820620000001,
                        "latency_99": 93.29504212,
                        "latency_999": 93.695905456
                    },
                    "optimized": {
                        "nb_forwards": 288,
                        "throughput": 19.2,
                        "latency_mean": 52.089132309027775,
                        "latency_std": 0.19887005817696832,
                        "latency_50": 52.075364,
                        "latency_90": 52.3625353,
                        "latency_95": 52.4649237,
                        "latency_99": 52.58880765,
                        "latency_999": 52.706969511
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 428,
                        "throughput": 28.53,
                        "latency_mean": 35.0650053457944,
                        "latency_std": 1.9416905978175623,
                        "latency_50": 34.4812805,
                        "latency_90": 38.0941055,
                        "latency_95": 38.495843199999996,
                        "latency_99": 39.51390012,
                        "latency_999": 42.14187876599995
                    },
                    "optimized": {
                        "nb_forwards": 1628,
                        "throughput": 108.53,
                        "latency_mean": 9.214644947174447,
                        "latency_std": 0.0940311186126701,
                        "latency_50": 9.205694,
                        "latency_90": 9.3333551,
                        "latency_95": 9.36865875,
                        "latency_99": 9.4546762,
                        "latency_999": 9.629165349000006
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 125,
                        "throughput": 8.33,
                        "latency_mean": 120.566423736,
                        "latency_std": 0.32773176890043826,
                        "latency_50": 120.535469,
                        "latency_90": 121.00340340000001,
                        "latency_95": 121.16425579999999,
                        "latency_99": 121.27308992,
                        "latency_999": 121.643879456
                    },
                    "optimized": {
                        "nb_forwards": 277,
                        "throughput": 18.47,
                        "latency_mean": 54.24849066787004,
                        "latency_std": 0.19047805788515895,
                        "latency_50": 54.224382,
                        "latency_90": 54.5044332,
                        "latency_95": 54.6109856,
                        "latency_99": 54.75149856,
                        "latency_999": 54.862251523999994
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 72.9,
                    "f1": 79.96371998744281
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    },
    {
        "model_name_or_path": "distilbert-base-uncased-distilled-squad",
        "task": "question-answering",
        "task_args": null,
        "dataset": {
            "path": "squad",
            "eval_split": "validation",
            "data_keys": {
                "question": "question",
                "context": "context"
            },
            "ref_keys": [
                "answers"
            ],
            "name": null,
            "calibration_split": "train"
        },
        "quantization_approach": "dynamic",
        "operators_to_quantize": [
            "Add"
        ],
        "node_exclusion": [
            "layernorm",
            "gelu",
            "residual",
            "gather",
            "softmax"
        ],
        "aware_training": false,
        "per_channel": false,
        "calibration": {
            "method": "minmax",
            "num_calibration_samples": 100,
            "calibration_histogram_percentile": null,
            "calibration_moving_average": null,
            "calibration_moving_average_constant": null
        },
        "framework": "onnxruntime",
        "framework_args": {
            "opset": 11,
            "optimization_level": 1
        },
        "hardware": "Architecture:                    x86_64\nCPU op-mode(s):                  32-bit, 64-bit\nByte Order:                      Little Endian\nAddress sizes:                   46 bits physical, 48 bits virtual\nCPU(s):                          8\nOn-line CPU(s) list:             0-7\nThread(s) per core:              2\nCore(s) per socket:              4\nSocket(s):                       1\nNUMA node(s):                    1\nVendor ID:                       GenuineIntel\nCPU family:                      6\nModel:                           85\nModel name:                      Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping:                        7\nCPU MHz:                         3099.997\nBogoMIPS:                        4999.99\nHypervisor vendor:               KVM\nVirtualization type:             full\nL1d cache:                       128 KiB\nL1i cache:                       128 KiB\nL2 cache:                        4 MiB\nL3 cache:                        35.8 MiB\nNUMA node0 CPU(s):               0-7\nVulnerability Itlb multihit:     KVM: Vulnerable\nVulnerability L1tf:              Mitigation; PTE Inversion\nVulnerability Mds:               Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown:          Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1:        Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:        Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds:             Not affected\nVulnerability Tsx async abort:   Not affected\nFlags:                           fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
        "versions": {
            "transformers": "4.20.1",
            "optimum": "1.2.3.dev0",
            "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
        },
        "evaluation": {
            "time": [
                {
                    "batch_size": 8,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 70,
                        "throughput": 4.67,
                        "latency_mean": 216.4796847285714,
                        "latency_std": 0.5319523840331125,
                        "latency_50": 216.4132115,
                        "latency_90": 217.2235808,
                        "latency_95": 217.43512135,
                        "latency_99": 217.93726658,
                        "latency_999": 218.004904658
                    },
                    "optimized": {
                        "nb_forwards": 93,
                        "throughput": 6.2,
                        "latency_mean": 162.63612432258063,
                        "latency_std": 6.7981229484609695,
                        "latency_50": 160.371024,
                        "latency_90": 175.9109966,
                        "latency_95": 178.2932724,
                        "latency_99": 180.45402764,
                        "latency_999": 181.166818064
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 250,
                        "throughput": 16.67,
                        "latency_mean": 60.13216628,
                        "latency_std": 1.0214734052265138,
                        "latency_50": 60.110321,
                        "latency_90": 61.3823292,
                        "latency_95": 61.8370275,
                        "latency_99": 62.49582684,
                        "latency_999": 62.834720483999995
                    },
                    "optimized": {
                        "nb_forwards": 506,
                        "throughput": 33.73,
                        "latency_mean": 29.64927941897233,
                        "latency_std": 0.09914349596696566,
                        "latency_50": 29.640438,
                        "latency_90": 29.7649045,
                        "latency_95": 29.817656,
                        "latency_99": 29.927720100000002,
                        "latency_999": 30.19670687
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 64,
                    "baseline": {
                        "nb_forwards": 125,
                        "throughput": 8.33,
                        "latency_mean": 120.849639712,
                        "latency_std": 0.9668375634433722,
                        "latency_50": 120.813037,
                        "latency_90": 121.9735066,
                        "latency_95": 122.5098556,
                        "latency_99": 122.86986592,
                        "latency_999": 123.22419958
                    },
                    "optimized": {
                        "nb_forwards": 141,
                        "throughput": 9.4,
                        "latency_mean": 106.49951225531915,
                        "latency_std": 0.31470398129243726,
                        "latency_50": 106.442906,
                        "latency_90": 106.932199,
                        "latency_95": 107.086644,
                        "latency_99": 107.3766202,
                        "latency_999": 107.86804946000001
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 343,
                        "throughput": 22.87,
                        "latency_mean": 43.83403140233236,
                        "latency_std": 6.777393138738352,
                        "latency_50": 47.974253,
                        "latency_90": 49.6771042,
                        "latency_95": 50.217503,
                        "latency_99": 50.915752839999996,
                        "latency_999": 52.175437058000014
                    },
                    "optimized": {
                        "nb_forwards": 875,
                        "throughput": 58.33,
                        "latency_mean": 17.15841726057143,
                        "latency_std": 0.08566829655094844,
                        "latency_50": 17.153516,
                        "latency_90": 17.2748228,
                        "latency_95": 17.3054081,
                        "latency_99": 17.36630658,
                        "latency_999": 17.49840138
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 128,
                        "throughput": 8.53,
                        "latency_mean": 117.3791356640625,
                        "latency_std": 0.8413632402417973,
                        "latency_50": 117.250864,
                        "latency_90": 118.50406629999999,
                        "latency_95": 118.87116745,
                        "latency_99": 119.4692367,
                        "latency_999": 119.927835012
                    },
                    "optimized": {
                        "nb_forwards": 144,
                        "throughput": 9.6,
                        "latency_mean": 104.27652415277778,
                        "latency_std": 0.3048652909436058,
                        "latency_50": 104.223559,
                        "latency_90": 104.6723871,
                        "latency_95": 104.8562854,
                        "latency_99": 105.23841804999999,
                        "latency_999": 105.67265316
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 68,
                        "throughput": 4.53,
                        "latency_mean": 221.2224431470588,
                        "latency_std": 1.118456788421114,
                        "latency_50": 221.1416945,
                        "latency_90": 222.8013713,
                        "latency_95": 223.27910730000002,
                        "latency_99": 223.56076078,
                        "latency_999": 223.828592878
                    },
                    "optimized": {
                        "nb_forwards": 68,
                        "throughput": 4.53,
                        "latency_mean": 221.36998825,
                        "latency_std": 0.4904801834733391,
                        "latency_50": 221.3431115,
                        "latency_90": 221.8755237,
                        "latency_95": 221.9769379,
                        "latency_99": 222.760048,
                        "latency_999": 223.048282
                    }
                },
                {
                    "batch_size": 4,
                    "input_length": 32,
                    "baseline": {
                        "nb_forwards": 209,
                        "throughput": 13.93,
                        "latency_mean": 72.04991935406699,
                        "latency_std": 0.9649173207131073,
                        "latency_50": 72.140642,
                        "latency_90": 73.1535642,
                        "latency_95": 73.46569579999999,
                        "latency_99": 74.7303538,
                        "latency_999": 75.40225632
                    },
                    "optimized": {
                        "nb_forwards": 280,
                        "throughput": 18.67,
                        "latency_mean": 53.682652257142855,
                        "latency_std": 0.2593595953484514,
                        "latency_50": 53.628275,
                        "latency_90": 53.8589829,
                        "latency_95": 54.04694715,
                        "latency_99": 55.05985277,
                        "latency_999": 55.324252431000005
                    }
                },
                {
                    "batch_size": 1,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 235,
                        "throughput": 15.67,
                        "latency_mean": 64.03706125957447,
                        "latency_std": 7.7729420260039,
                        "latency_50": 58.940644,
                        "latency_90": 73.7838802,
                        "latency_95": 74.4361305,
                        "latency_99": 75.54929478,
                        "latency_999": 76.00147653
                    },
                    "optimized": {
                        "nb_forwards": 300,
                        "throughput": 20.0,
                        "latency_mean": 50.13742374666667,
                        "latency_std": 4.800538669035651,
                        "latency_50": 47.3348575,
                        "latency_90": 57.0711271,
                        "latency_95": 57.1360735,
                        "latency_99": 57.42901637,
                        "latency_999": 57.585291638
                    }
                },
                {
                    "batch_size": 8,
                    "input_length": 128,
                    "baseline": {
                        "nb_forwards": 37,
                        "throughput": 2.47,
                        "latency_mean": 406.5056369189189,
                        "latency_std": 0.7541595003868546,
                        "latency_50": 406.359168,
                        "latency_90": 407.44441,
                        "latency_95": 408.065172,
                        "latency_99": 408.18207568,
                        "latency_999": 408.195881968
                    },
                    "optimized": {
                        "nb_forwards": 39,
                        "throughput": 2.6,
                        "latency_mean": 394.84622148717943,
                        "latency_std": 54.09154075921731,
                        "latency_50": 441.007729,
                        "latency_90": 446.218295,
                        "latency_95": 447.0315637,
                        "latency_99": 447.1409742,
                        "latency_999": 447.15734232
                    }
                }
            ],
            "others": {
                "baseline": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                },
                "optimized": {
                    "exact_match": 82.3,
                    "f1": 87.2318519258519
                }
            }
        },
        "max_eval_samples": 1000,
        "time_benchmark_args": {
            "duration": 15,
            "warmup_runs": 5
        },
        "model_type": "distilbert"
    }
]