import os nvidia_available = os.system("nvidia-smi") == 0 if nvidia_available: os.system( "pip install optimum-benchmark[onnxruntime-gpu,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git" ) os.system("pip uninstall onnxruntime onnxruntime-gpu -y") os.system("pip install onnxruntime-gpu") DEVICES = ["cpu", "cuda"] else: os.system( "pip install optimum-benchmark[onnxruntime,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git" ) DEVICES = ["cpu"] BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"] BENCHMARKS = ["inference", "training"] import random import gradio as gr from optimum_benchmark.task_utils import ( TASKS_TO_AUTOMODELS, infer_task_from_model_name_or_path, ) from run import run_benchmark from config_store import ( get_training_config, get_inference_config, get_neural_compressor_config, get_onnxruntime_config, get_openvino_config, get_pytorch_config, ) with gr.Blocks() as demo: # add image gr.Markdown( """""" ) # title text gr.Markdown("
" "Note: Duplicate the space and change the hardware settings to an Nvidia machine to target CUDA devices." "
" ) model = gr.Textbox( label="model", value="distilbert-base-uncased-finetuned-sst-2-english", info="Model to run the benchmark on. Press enter to infer the task automatically.", ) task = gr.Dropdown( label="task", value="text-classification", choices=list(TASKS_TO_AUTOMODELS.keys()), info="Task to run the benchmark on. Can be infered automatically by submitting a model.", ) device = gr.Dropdown( value="cpu", label="device", choices=DEVICES, info="Device to run the benchmark on. make sure to duplicate the space if you wanna run on CUDA devices.", ) experiment = gr.Textbox( label="experiment_name", value=f"awesome-experiment-{random.randint(0, 1000)}", info="Name of the experiment. Will be used to create a folder where results are stored.", ) model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task) with gr.Row(): with gr.Column(): with gr.Row(): backend = gr.Dropdown( label="backend", choices=BACKENDS, value=BACKENDS[0], info="Backend to run the benchmark on.", ) with gr.Row() as backend_configs: with gr.Accordion(label="Pytorch Config", open=False, visible=True): pytorch_config = get_pytorch_config() with gr.Accordion(label="OnnxRunTime Config", open=False, visible=False): onnxruntime_config = get_onnxruntime_config() with gr.Accordion(label="OpenVINO Config", open=False, visible=False): openvino_config = get_openvino_config() with gr.Accordion(label="Neural Compressor Config", open=False, visible=False): neural_compressor_config = get_neural_compressor_config() # hide backend configs based on backend backend.change( inputs=backend, outputs=backend_configs.children, fn=lambda value: [gr.update(visible=value == key) for key in BACKENDS], ) with gr.Column(): with gr.Row(): benchmark = gr.Dropdown( label="benchmark", choices=BENCHMARKS, value=BENCHMARKS[0], info="Type of benchmark to run.", ) with gr.Row() as benchmark_configs: with gr.Accordion(label="Inference Config", open=False, visible=True): inference_config = get_inference_config() with gr.Accordion(label="Training Config", open=False, visible=False): training_config = get_training_config() # hide benchmark configs based on benchmark benchmark.change( inputs=benchmark, outputs=benchmark_configs.children, fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS], ) baseline = gr.Checkbox( value=False, label="Compare to Baseline", info="If checked, will run two experiments: one with the given configuration, and another with a a baseline pytorch configuration.", ) button = gr.Button(value="Run Benchmark", variant="primary") with gr.Accordion(label="", open=True): html_output = gr.HTML() table_output = gr.Dataframe(visible=False) button.click( fn=run_benchmark, inputs={ experiment, baseline, model, task, device, backend, benchmark, *pytorch_config, *openvino_config, *onnxruntime_config, *neural_compressor_config, *inference_config, *training_config, }, outputs=[html_output, button, table_output], queue=True, ) demo.queue().launch()