Spaces:

optimum
/

auto-benchmark

Running

File size: 12,456 Bytes

import gradio as gr


def get_base_backend_config(backend_name="pytorch"):
    return [
        # seed
        gr.Textbox(
            value=42,
            label=f"{backend_name}.seed",
            info="Sets seed for reproducibility",
        ),
        # inter_op_num_threads
        gr.Textbox(
            value="null",
            label=f"{backend_name}.inter_op_num_threads",
            info="Use null for default and -1 for cpu_count()",
        ),
        # intra_op_num_threads
        gr.Textbox(
            value="null",
            label=f"{backend_name}.intra_op_num_threads",
            info="Use null for default and -1 for cpu_count()",
        ),
        # initial_isolation_check
        gr.Checkbox(
            value=True,
            label=f"{backend_name}.initial_isolation_check",
            info="Makes sure that initially, no other process is running on the target device",
        ),
        # continous_isolation_check
        gr.Checkbox(
            value=True,
            label=f"{backend_name}.continous_isolation_check",
            info="Makes sure that throughout the benchmark, no other process is running on the target device",
        ),
        # delete_cache
        gr.Checkbox(
            value=False,
            label=f"{backend_name}.delete_cache",
            info="Deletes model cache (weights & configs) after benchmark is done",
        ),
    ]


def get_pytorch_config():
    return get_base_backend_config(backend_name="pytorch") + [
        # no_weights
        gr.Checkbox(
            value=False,
            label="pytorch.no_weights",
            info="Generates random weights instead of downloading pretrained ones",
        ),
        # # device_map
        # gr.Dropdown(
        #     value="null",
        #
        #     label="pytorch.device_map",
        #     choices=["null", "auto", "sequential"],
        #     info="Use null for default and `auto` or `sequential` the same way as in `from_pretrained`",
        # ),
        # torch_dtype
        gr.Dropdown(
            value="null",
            label="pytorch.torch_dtype",
            choices=["null", "bfloat16", "float16", "float32", "auto"],
            info="Use null for default and `auto` for automatic dtype selection",
        ),
        # amp_autocast
        gr.Checkbox(
            value=False,
            label="pytorch.amp_autocast",
            info="Enables Pytorch's native Automatic Mixed Precision",
        ),
        # amp_dtype
        gr.Dropdown(
            value="null",
            label="pytorch.amp_dtype",
            info="Use null for default",
            choices=["null", "bfloat16", "float16"],
        ),
        # torch_compile
        gr.Checkbox(
            value=False,
            label="pytorch.torch_compile",
            info="Compiles the model with torch.compile",
        ),
        # bettertransformer
        gr.Checkbox(
            value=False,
            label="pytorch.bettertransformer",
            info="Applies optimum.BetterTransformer for fastpath anf optimized attention",
        ),
        # quantization_scheme
        gr.Dropdown(
            value="null",
            choices=["null", "gptq", "bnb"],
            label="pytorch.quantization_scheme",
            info="Use null for no quantization",
        ),
        # # use_ddp
        # gr.Checkbox(
        #     value=False,
        #
        #     label="pytorch.use_ddp",
        #     info="Uses DistributedDataParallel for multi-gpu training",
        # ),
        # peft_strategy
        gr.Dropdown(
            value="null",
            choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
            label="pytorch.peft_strategy",
            info="Use null for no PEFT",
        ),
    ]


def get_onnxruntime_config():
    return get_base_backend_config(backend_name="onnxruntime") + [
        # no_weights
        gr.Checkbox(
            value=False,
            label="pytorch.no_weights",
            info="Generates random weights instead of downloading pretrained ones",
        ),
        # export
        gr.Checkbox(
            value=True,
            label="onnxruntime.export",
            info="Exports the model to ONNX",
        ),
        # use_cache
        gr.Checkbox(
            value=True,
            label="onnxruntime.use_cache",
            info="Uses cached ONNX model if available",
        ),
        # use_merged
        gr.Checkbox(
            value=False,
            label="onnxruntime.use_merged",
            info="Uses merged ONNX model if available",
        ),
        # torch_dtype
        gr.Dropdown(
            value="null",
            label="onnxruntime.torch_dtype",
            choices=["null", "bfloat16", "float16", "float32", "auto"],
            info="Use null for default and `auto` for automatic dtype selection",
        ),
        # use_io_binding
        gr.Checkbox(
            value=True,
            label="onnxruntime.use_io_binding",
            info="Uses IO binding for inference",
        ),
        # auto_optimization
        gr.Dropdown(
            value="null",
            label="onnxruntime.auto_optimization",
            choices=["null", "O1", "O2", "O3", "O4"],
            info="Use null for default",
        ),
        # auto_quantization
        gr.Dropdown(
            value="null",
            label="onnxruntime.auto_quantization",
            choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"],
            info="Use null for default",
        ),
        # optimization
        gr.Checkbox(
            value=False,
            label="onnxruntime.optimization",
            info="Enables manual optimization",
        ),
        # optimization_config
        gr.Dataframe(
            type="array",
            value=[["optimization_level"]],
            headers=["1"],
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="onnxruntime.optimization_config",
        ),
        # quantization
        gr.Checkbox(
            value=False,
            label="onnxruntime.quantization",
            info="Enables manual quantization",
        ),
        # quantization_config
        gr.Dataframe(
            type="array",
            value=[["is_static"]],
            headers=[False],
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="onnxruntime.quantization_config",
            info="Use null for default",
        ),
        # calibration
        gr.Checkbox(
            value=False,
            label="onnxruntime.calibration",
            info="Enables calibration",
        ),
        # calibration_config
        gr.Dataframe(
            type="array",
            value=[["glue"]],
            headers=["dataset_name"],
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="onnxruntime.calibration_config",
            info="Use null for default",
        ),
        # peft_strategy
        gr.Dropdown(
            value="null",
            label="onnxruntime.peft_strategy",
            choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
            info="Use null for full parameters fine-tuning",
        ),
    ]


def get_openvino_config():
    return get_base_backend_config(backend_name="openvino") + [
        # export
        gr.Checkbox(
            value=True,
            label="openvino.export",
            info="Exports the model to ONNX",
        ),
        # use_cache
        gr.Checkbox(
            value=True,
            label="openvino.use_cache",
            info="Uses cached ONNX model if available",
        ),
        # use_merged
        gr.Checkbox(
            value=False,
            label="openvino.use_merged",
            info="Uses merged ONNX model if available",
        ),
        # reshape
        gr.Checkbox(
            value=False,
            label="openvino.reshape",
            info="Reshapes the model to the input shape",
        ),
        # half
        gr.Checkbox(
            value=False,
            label="openvino.half",
            info="Converts model to half precision",
        ),
        # quantization
        gr.Checkbox(
            value=False,
            label="openvino.quantization",
            info="Enables quantization",
        ),
        # quantization_config
        gr.Dataframe(
            type="array",
            headers=["compression", "input_info", "save_onnx_model"],
            value=[[None, None, None]],
            row_count=(1, "static"),
            col_count=(3, "dynamic"),
            label="openvino.quantization_config",
        ),
        # calibration
        gr.Checkbox(
            value=False,
            label="openvino.calibration",
            info="Enables calibration",
        ),
        # calibration_config
        gr.Dataframe(
            type="array",
            headers=["dataset_name"],
            value=[["glue"]],
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="openvino.calibration_config",
        ),
    ]


def get_neural_compressor_config():
    return get_base_backend_config(backend_name="neural-compressor") + [
        # ptq_quantization
        gr.Checkbox(
            value=False,
            label="neural-compressor.ptq_quantization",
            info="Enables post-training quantization",
        ),
        # ptq_quantization_config
        gr.Dataframe(
            type="array",
            headers=["device"],
            value=[["cpu"]],
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="neural-compressor.ptq_quantization_config",
        ),
        # calibration
        gr.Checkbox(
            value=False,
            label="neural-compressor.calibration",
            info="Enables calibration",
        ),
        # calibration_config
        gr.Dataframe(
            type="array",
            headers=["dataset_name"],
            value=[["glue"]],
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="neural-compressor.calibration_config",
        ),
    ]


def get_inference_config():
    return [
        # duration
        gr.Textbox(
            value=10,
            label="inference.duration",
            info="Minimum duration of benchmark in seconds",
        ),
        # warmup runs
        gr.Textbox(
            value=10,
            label="inference.warmup_runs",
            info="Number of warmup runs before measurements",
        ),
        # memory
        gr.Checkbox(
            value=False,
            label="inference.memory",
            info="Measures the peak memory footprint",
        ),
        # energy
        gr.Checkbox(
            value=False,
            label="inference.energy",
            info="Measures energy consumption and carbon emissions",
        ),
        # input_shapes
        gr.Dataframe(
            type="array",
            value=[[2, 16]],
            row_count=(1, "static"),
            col_count=(2, "dynamic"),
            label="inference.input_shapes",
            headers=["batch_size", "sequence_length"],
            info="Controllable input shapes, add more columns for more inputs",
        ),
        # forward kwargs
        gr.Dataframe(
            type="array",
            value=[[False]],
            headers=["return_dict"],
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="inference.forward_kwargs",
            info="Keyword arguments for the forward pass, add more columns for more arguments",
        ),
    ]


def get_training_config():
    return [
        # warmup steps
        gr.Textbox(
            value=40,
            label="training.warmup_steps",
        ),
        # dataset_shapes
        gr.Dataframe(
            type="array",
            value=[[500, 16]],
            headers=["dataset_size", "sequence_length"],
            row_count=(1, "static"),
            col_count=(2, "dynamic"),
            label="training.dataset_shapes",
        ),
        # training_arguments
        gr.Dataframe(
            value=[[2]],
            type="array",
            row_count=(1, "static"),
            col_count=(1, "dynamic"),
            label="training.training_arguments",
            headers=["per_device_train_batch_size"],
        ),
    ]