Spaces:
Running
Running
Commit
•
7bb9bae
1
Parent(s):
04d9934
various
Browse files
app.py
CHANGED
@@ -1,18 +1,24 @@
|
|
1 |
import os
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
"pip install optimum-benchmark[onnxruntime-gpu,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
|
7 |
-
)
|
8 |
-
os.system("pip uninstall onnxruntime onnxruntime-gpu -y")
|
9 |
-
os.system("pip install onnxruntime-gpu")
|
10 |
DEVICES = ["cpu", "cuda"]
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
else:
|
12 |
-
os.system(
|
13 |
-
"pip install optimum-benchmark[onnxruntime,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
|
14 |
-
)
|
15 |
DEVICES = ["cpu"]
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
|
18 |
BENCHMARKS = ["inference", "training"]
|
@@ -48,7 +54,8 @@ with gr.Blocks() as demo:
|
|
48 |
"Zero code Gradio interface of <a href='https://github.com/huggingface/optimum-benchmark.git'>Optimum-Benchmark</a><br>"
|
49 |
"</h3>"
|
50 |
"<p style='text-align: center'>"
|
51 |
-
"Note: Duplicate
|
|
|
52 |
"</p>"
|
53 |
)
|
54 |
|
@@ -71,7 +78,7 @@ with gr.Blocks() as demo:
|
|
71 |
)
|
72 |
experiment = gr.Textbox(
|
73 |
label="experiment_name",
|
74 |
-
value=f"awesome-experiment-{random.randint(0,
|
75 |
info="Name of the experiment. Will be used to create a folder where results are stored.",
|
76 |
)
|
77 |
model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
|
@@ -87,13 +94,13 @@ with gr.Blocks() as demo:
|
|
87 |
)
|
88 |
|
89 |
with gr.Row() as backend_configs:
|
90 |
-
with gr.Accordion(label="
|
91 |
pytorch_config = get_pytorch_config()
|
92 |
-
with gr.Accordion(label="
|
93 |
onnxruntime_config = get_onnxruntime_config()
|
94 |
-
with gr.Accordion(label="
|
95 |
openvino_config = get_openvino_config()
|
96 |
-
with gr.Accordion(label="
|
97 |
neural_compressor_config = get_neural_compressor_config()
|
98 |
|
99 |
# hide backend configs based on backend
|
@@ -113,9 +120,9 @@ with gr.Blocks() as demo:
|
|
113 |
)
|
114 |
|
115 |
with gr.Row() as benchmark_configs:
|
116 |
-
with gr.Accordion(label="
|
117 |
inference_config = get_inference_config()
|
118 |
-
with gr.Accordion(label="
|
119 |
training_config = get_training_config()
|
120 |
|
121 |
# hide benchmark configs based on benchmark
|
@@ -127,8 +134,8 @@ with gr.Blocks() as demo:
|
|
127 |
|
128 |
baseline = gr.Checkbox(
|
129 |
value=False,
|
130 |
-
label="
|
131 |
-
info="
|
132 |
)
|
133 |
|
134 |
button = gr.Button(value="Run Benchmark", variant="primary")
|
@@ -156,5 +163,11 @@ with gr.Blocks() as demo:
|
|
156 |
outputs=[html_output, button, table_output],
|
157 |
queue=True,
|
158 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
demo.queue().launch()
|
|
|
1 |
import os
|
2 |
+
import subprocess
|
3 |
+
import importlib.util
|
4 |
|
5 |
+
NVIDIA_AVAILABLE = subprocess.check_output("nvidia-smi").decode("utf-8").strip() != ""
|
6 |
+
|
7 |
+
if NVIDIA_AVAILABLE:
|
|
|
|
|
|
|
|
|
8 |
DEVICES = ["cpu", "cuda"]
|
9 |
+
if importlib.util.find_spec("optimum_benchmark") is None:
|
10 |
+
os.system(
|
11 |
+
"pip install optimum-benchmark[onnxruntime-gpu,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
|
12 |
+
)
|
13 |
+
os.system("pip uninstall onnxruntime onnxruntime-gpu -y")
|
14 |
+
os.system("pip install onnxruntime-gpu")
|
15 |
else:
|
|
|
|
|
|
|
16 |
DEVICES = ["cpu"]
|
17 |
+
if importlib.util.find_spec("optimum_benchmark") is None:
|
18 |
+
os.system(
|
19 |
+
"pip install optimum-benchmark[onnxruntime,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
|
20 |
+
)
|
21 |
+
|
22 |
|
23 |
BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
|
24 |
BENCHMARKS = ["inference", "training"]
|
|
|
54 |
"Zero code Gradio interface of <a href='https://github.com/huggingface/optimum-benchmark.git'>Optimum-Benchmark</a><br>"
|
55 |
"</h3>"
|
56 |
"<p style='text-align: center'>"
|
57 |
+
"Note: <a href='https://huggingface.co/spaces/optimum/optimum-benchmark-ui?duplicate=true'>Duplicate this space</a> and change its hardware to enable CUDA device<br>"
|
58 |
+
"or <a href='https://huggingface.co/spaces/optimum/optimum-benchmark-ui?docker=true'>Run with Docker</a> locally to target your own hardware."
|
59 |
"</p>"
|
60 |
)
|
61 |
|
|
|
78 |
)
|
79 |
experiment = gr.Textbox(
|
80 |
label="experiment_name",
|
81 |
+
value=f"awesome-experiment-{random.randint(0, 100000)}",
|
82 |
info="Name of the experiment. Will be used to create a folder where results are stored.",
|
83 |
)
|
84 |
model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
|
|
|
94 |
)
|
95 |
|
96 |
with gr.Row() as backend_configs:
|
97 |
+
with gr.Accordion(label="backend options", open=False, visible=True):
|
98 |
pytorch_config = get_pytorch_config()
|
99 |
+
with gr.Accordion(label="backend config", open=False, visible=False):
|
100 |
onnxruntime_config = get_onnxruntime_config()
|
101 |
+
with gr.Accordion(label="backend config", open=False, visible=False):
|
102 |
openvino_config = get_openvino_config()
|
103 |
+
with gr.Accordion(label="backend config", open=False, visible=False):
|
104 |
neural_compressor_config = get_neural_compressor_config()
|
105 |
|
106 |
# hide backend configs based on backend
|
|
|
120 |
)
|
121 |
|
122 |
with gr.Row() as benchmark_configs:
|
123 |
+
with gr.Accordion(label="benchmark Config", open=False, visible=True):
|
124 |
inference_config = get_inference_config()
|
125 |
+
with gr.Accordion(label="benchmark Config", open=False, visible=False):
|
126 |
training_config = get_training_config()
|
127 |
|
128 |
# hide benchmark configs based on benchmark
|
|
|
134 |
|
135 |
baseline = gr.Checkbox(
|
136 |
value=False,
|
137 |
+
label="compare_to_baseline",
|
138 |
+
info="Check this box to compare your chosen configuration to the baseline configuration.",
|
139 |
)
|
140 |
|
141 |
button = gr.Button(value="Run Benchmark", variant="primary")
|
|
|
163 |
outputs=[html_output, button, table_output],
|
164 |
queue=True,
|
165 |
)
|
166 |
+
button.click(
|
167 |
+
fn=lambda: f"awesome-experiment-{random.randint(0, 100000)}",
|
168 |
+
inputs=[],
|
169 |
+
outputs=experiment,
|
170 |
+
queue=True,
|
171 |
+
)
|
172 |
|
173 |
demo.queue().launch()
|
run.py
CHANGED
@@ -8,13 +8,12 @@ ansi2html_converter = Ansi2HTMLConverter(inline=True)
|
|
8 |
|
9 |
def run_benchmark(kwargs):
|
10 |
for key, value in kwargs.copy().items():
|
11 |
-
if key.label == "
|
12 |
baseline = value
|
13 |
kwargs.pop(key)
|
14 |
elif key.label == "experiment_name":
|
15 |
experiment_name = value
|
16 |
kwargs.pop(key)
|
17 |
-
|
18 |
elif key.label == "model":
|
19 |
model = value
|
20 |
kwargs.pop(key)
|
@@ -45,7 +44,7 @@ def run_benchmark(kwargs):
|
|
45 |
f"model={model}",
|
46 |
f"device={device}",
|
47 |
f"benchmark={benchmark}",
|
48 |
-
f"experiment_name=
|
49 |
]
|
50 |
for component, value in kwargs.items():
|
51 |
if f"{benchmark}." in component.label:
|
@@ -93,7 +92,7 @@ def run_benchmark(kwargs):
|
|
93 |
return
|
94 |
|
95 |
if baseline:
|
96 |
-
baseline_table = pd.read_csv(f"runs/
|
97 |
table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
|
98 |
# concat tables
|
99 |
table = pd.concat([baseline_table, table], axis=0)
|
@@ -121,6 +120,9 @@ def run_experiment(args, html_text=""):
|
|
121 |
|
122 |
curr_ansi_text = ""
|
123 |
for ansi_line in iter(process.stdout.readline, ""):
|
|
|
|
|
|
|
124 |
# stream process output to stdout
|
125 |
print(ansi_line, end="")
|
126 |
# skip torch.distributed.nn.jit.instantiator messages
|
@@ -164,7 +166,7 @@ def postprocess_table(table, experiment_name):
|
|
164 |
table["forward.peak_memory.reduction(%)"] = (
|
165 |
table["forward.peak_memory(MB)"] / table["forward.peak_memory(MB)"].iloc[0] - 1
|
166 |
) * 100
|
167 |
-
table["forward.peak_memory.reduction(%)"] = table["forward.peak_memory.
|
168 |
|
169 |
if "generate.latency(s)" in table.columns:
|
170 |
table["generate.latency.reduction(%)"] = (
|
|
|
8 |
|
9 |
def run_benchmark(kwargs):
|
10 |
for key, value in kwargs.copy().items():
|
11 |
+
if key.label == "compare_to_baseline":
|
12 |
baseline = value
|
13 |
kwargs.pop(key)
|
14 |
elif key.label == "experiment_name":
|
15 |
experiment_name = value
|
16 |
kwargs.pop(key)
|
|
|
17 |
elif key.label == "model":
|
18 |
model = value
|
19 |
kwargs.pop(key)
|
|
|
44 |
f"model={model}",
|
45 |
f"device={device}",
|
46 |
f"benchmark={benchmark}",
|
47 |
+
f"experiment_name=baseline_{experiment_name}",
|
48 |
]
|
49 |
for component, value in kwargs.items():
|
50 |
if f"{benchmark}." in component.label:
|
|
|
92 |
return
|
93 |
|
94 |
if baseline:
|
95 |
+
baseline_table = pd.read_csv(f"runs/baseline_{experiment_name}/{benchmark}_results.csv", index_col=0)
|
96 |
table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
|
97 |
# concat tables
|
98 |
table = pd.concat([baseline_table, table], axis=0)
|
|
|
120 |
|
121 |
curr_ansi_text = ""
|
122 |
for ansi_line in iter(process.stdout.readline, ""):
|
123 |
+
if process.returncode is not None and process.returncode != 0:
|
124 |
+
break
|
125 |
+
|
126 |
# stream process output to stdout
|
127 |
print(ansi_line, end="")
|
128 |
# skip torch.distributed.nn.jit.instantiator messages
|
|
|
166 |
table["forward.peak_memory.reduction(%)"] = (
|
167 |
table["forward.peak_memory(MB)"] / table["forward.peak_memory(MB)"].iloc[0] - 1
|
168 |
) * 100
|
169 |
+
table["forward.peak_memory.reduction(%)"] = table["forward.peak_memory.reduction(%)"].round(2)
|
170 |
|
171 |
if "generate.latency(s)" in table.columns:
|
172 |
table["generate.latency.reduction(%)"] = (
|