import os import random import gradio as gr from optimum_benchmark.task_utils import ( TASKS_TO_AUTOMODELS, infer_task_from_model_name_or_path, ) from run import run_benchmark from config_store import ( get_training_config, get_inference_config, get_text_generation_inference_config, get_neural_compressor_config, get_onnxruntime_config, get_openvino_config, get_pytorch_config, ) os.system("curl -fsSL https://get.docker.com -o get-docker.sh") os.system("sh get-docker.sh") BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "text-generation-inference"] BENCHMARKS = ["inference", "training"] DEVICES = ["cpu", "cuda"] with gr.Blocks() as demo: # title text gr.HTML("

🤗 Optimum-Benchmark UI 🏋️

") # explanation text gr.Markdown( "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):" "
A unified multi-backend utility for benchmarking `transformers`, `diffusers`, `peft` and `timm` models with " "Optimum's optimizations & quantization, for inference & training, on different backends & hardwares." ) model = gr.Textbox( label="model", value="bert-base-uncased", info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.", ) task = gr.Dropdown( label="task", value="text-classification", choices=list(TASKS_TO_AUTOMODELS.keys()), info="Task to run the benchmark on. Can be infered automatically by submitting a model.", ) device = gr.Dropdown( value="cpu", label="device", choices=DEVICES, info="Device to run the benchmark on. make sure to duplicate the space if you wanna run on CUDA devices.", ) experiment = gr.Textbox( label="experiment_name", value=f"experiment_{random.getrandbits(16)}", info="Name of the experiment. Will be used to create a folder where results are stored.", ) model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task) with gr.Row(): with gr.Column(): with gr.Row(): backend = gr.Dropdown( label="backend", choices=BACKENDS, value=BACKENDS[0], info="Backend to run the benchmark on.", ) with gr.Row() as backend_configs: with gr.Accordion(label="Pytorch Config", open=False, visible=True): pytorch_config = get_pytorch_config() with gr.Accordion(label="OnnxRunTime Config", open=False, visible=False): onnxruntime_config = get_onnxruntime_config() with gr.Accordion(label="OpenVINO Config", open=False, visible=False): openvino_config = get_openvino_config() with gr.Accordion(label="Neural Compressor Config", open=False, visible=False): neural_compressor_config = get_neural_compressor_config() with gr.Accordion(label="Text Generation Inference Config", open=False, visible=False): text_generation_inference_config = get_text_generation_inference_config() # hide backend configs based on backend backend.change( inputs=backend, outputs=backend_configs.children, fn=lambda value: [gr.update(visible=value == key) for key in BACKENDS], ) with gr.Column(): with gr.Row(): benchmark = gr.Dropdown( label="benchmark", choices=BENCHMARKS, value=BENCHMARKS[0], info="Type of benchmark to run.", ) with gr.Row() as benchmark_configs: with gr.Accordion(label="Inference Config", open=False, visible=True): inference_config = get_inference_config() with gr.Accordion(label="Training Config", open=False, visible=False): training_config = get_training_config() # hide benchmark configs based on benchmark benchmark.change( inputs=benchmark, outputs=benchmark_configs.children, fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS], ) baseline = gr.Checkbox( value=False, label="Compare to Baseline", info="If checked, will run two experiments: one with the given configuration, and another with a a baseline pytorch configuration.", ) button = gr.Button(value="Run Benchmark", variant="primary") with gr.Accordion(label="", open=True): html_output = gr.HTML() table_output = gr.Dataframe(visible=False) button.click( fn=run_benchmark, inputs={ experiment, baseline, model, task, device, backend, benchmark, *pytorch_config, *openvino_config, *onnxruntime_config, *neural_compressor_config, *inference_config, *training_config, }, outputs=[html_output, button, table_output], queue=True, ) demo.queue().launch()