File size: 5,334 Bytes
d1e3b68
 
 
 
 
 
7f9a235
7724866
7f9a235
 
d1cb523
7f9a235
 
 
 
 
d1e3b68
d1cb523
7f9a235
db435b4
d1e3b68
 
 
 
d1cb523
d1e3b68
db435b4
 
 
 
 
d1e3b68
 
 
 
b71e276
d1e3b68
 
 
 
 
b71e276
d1e3b68
 
 
 
db435b4
b71e276
d1e3b68
7f9a235
d1e3b68
 
b71e276
d1e3b68
7f9a235
d1e3b68
 
7f9a235
 
 
 
 
 
b71e276
7f9a235
 
 
 
 
 
 
 
 
 
 
d1cb523
 
7f9a235
 
 
 
 
 
 
 
 
 
 
 
 
 
b71e276
7f9a235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b71e276
 
 
 
 
 
7f9a235
db435b4
 
 
7f9a235
 
 
d1e3b68
7f9a235
b71e276
d1e3b68
 
 
 
 
7f9a235
 
 
 
 
 
d1e3b68
db435b4
d1e3b68
 
 
7f9a235
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import random
import gradio as gr
from optimum_benchmark.task_utils import (
    TASKS_TO_AUTOMODELS,
    infer_task_from_model_name_or_path,
)
from run import run_benchmark
from config_store import (
    get_training_config,
    get_inference_config,
    get_text_generation_inference_config,
    get_neural_compressor_config,
    get_onnxruntime_config,
    get_openvino_config,
    get_pytorch_config,
)

BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "text-generation-inference"]
BENCHMARKS = ["inference", "training"]
DEVICES = ["cpu", "cuda"]


with gr.Blocks() as demo:
    # title text
    gr.HTML("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI 🏋️</h1>")
    # explanation text
    gr.Markdown(
        "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):"
        "<br>A unified multi-backend utility for benchmarking `transformers`, `diffusers`, `peft` and `timm` models with "
        "Optimum's optimizations & quantization, for inference & training, on different backends & hardwares."
    )

    model = gr.Textbox(
        label="model",
        value="bert-base-uncased",
        info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.",
    )
    task = gr.Dropdown(
        label="task",
        value="text-classification",
        choices=list(TASKS_TO_AUTOMODELS.keys()),
        info="Task to run the benchmark on. Can be infered automatically by submitting a model.",
    )
    device = gr.Dropdown(
        value="cpu",
        label="device",
        choices=DEVICES,
        info="Device to run the benchmark on. make sure to duplicate the space if you wanna run on CUDA devices.",
    )
    experiment = gr.Textbox(
        label="experiment_name",
        value=f"experiment_{random.getrandbits(16)}",
        info="Name of the experiment. Will be used to create a folder where results are stored.",
    )
    model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)

    with gr.Row():
        with gr.Column():
            with gr.Row():
                backend = gr.Dropdown(
                    label="backend",
                    choices=BACKENDS,
                    value=BACKENDS[0],
                    info="Backend to run the benchmark on.",
                )

            with gr.Row() as backend_configs:
                with gr.Accordion(label="Pytorch Config", open=False, visible=True):
                    pytorch_config = get_pytorch_config()
                with gr.Accordion(label="OnnxRunTime Config", open=False, visible=False):
                    onnxruntime_config = get_onnxruntime_config()
                with gr.Accordion(label="OpenVINO Config", open=False, visible=False):
                    openvino_config = get_openvino_config()
                with gr.Accordion(label="Neural Compressor Config", open=False, visible=False):
                    neural_compressor_config = get_neural_compressor_config()
                with gr.Accordion(label="Text Generation Inference Config", open=False, visible=False):
                    text_generation_inference_config = get_text_generation_inference_config()

        # hide backend configs based on backend
        backend.change(
            inputs=backend,
            outputs=backend_configs.children,
            fn=lambda value: [gr.update(visible=value == key) for key in BACKENDS],
        )

        with gr.Column():
            with gr.Row():
                benchmark = gr.Dropdown(
                    label="benchmark",
                    choices=BENCHMARKS,
                    value=BENCHMARKS[0],
                    info="Type of benchmark to run.",
                )

            with gr.Row() as benchmark_configs:
                with gr.Accordion(label="Inference Config", open=False, visible=True):
                    inference_config = get_inference_config()
                with gr.Accordion(label="Training Config", open=False, visible=False):
                    training_config = get_training_config()

        # hide benchmark configs based on benchmark
        benchmark.change(
            inputs=benchmark,
            outputs=benchmark_configs.children,
            fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS],
        )

    baseline = gr.Checkbox(
        value=False,
        label="Compare to Baseline",
        info="If checked, will run two experiments: one with the given configuration, and another with a a baseline pytorch configuration.",
    )

    button = gr.Button(value="Run Benchmark", variant="primary")
    with gr.Accordion(label="", open=True):
        html_output = gr.HTML()
        table_output = gr.Dataframe(visible=False)

    button.click(
        fn=run_benchmark,
        inputs={
            experiment,
            baseline,
            model,
            task,
            device,
            backend,
            benchmark,
            *pytorch_config,
            *openvino_config,
            *onnxruntime_config,
            *neural_compressor_config,
            *inference_config,
            *training_config,
        },
        outputs=[html_output, button, table_output],
        queue=True,
    )

demo.queue().launch()