Spaces:

optimum
/

auto-benchmark

Running

App Files Files Community

IlyasMoutawwakil HF staff commited on Sep 13, 2023

Commit

b71e276

•

1 Parent(s): 0425d1c

benchmark vs baseline

Browse files

Files changed (2) hide show

app.py +14 -1
run.py +105 -24

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ DEVICES = ["cpu", "cuda"]
 with gr.Blocks() as demo:
     # title text
-    gr.HTML("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI 🏋️</h1>")
     # explanation text
     gr.Markdown(
         "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):"
@@ -32,20 +32,24 @@ with gr.Blocks() as demo:
     model = gr.Textbox(
         label="model",
         value="bert-base-uncased",
     )
     task = gr.Dropdown(
         label="task",
         value="text-classification",
         choices=list(TASKS_TO_AUTOMODELS.keys()),
     )
     device = gr.Dropdown(
         value="cpu",
         label="device",
         choices=DEVICES,
     )
     experiment = gr.Textbox(
         label="experiment_name",
         value=f"experiment_{random.getrandbits(16)}",
     )
     model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
@@ -56,6 +60,7 @@ with gr.Blocks() as demo:
                     label="backend",
                     choices=BACKENDS,
                     value=BACKENDS[0],
                 )
             with gr.Row() as backend_configs:
@@ -81,6 +86,7 @@ with gr.Blocks() as demo:
                     label="benchmark",
                     choices=BENCHMARKS,
                     value=BENCHMARKS[0],
                 )
             with gr.Row() as benchmark_configs:
@@ -96,6 +102,12 @@ with gr.Blocks() as demo:
             fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS],
         )
     button = gr.Button(value="Run Benchmark", variant="primary")
     with gr.Accordion(label="", open=True):
         html_output = gr.HTML()
@@ -105,6 +117,7 @@ with gr.Blocks() as demo:
         fn=run_benchmark,
         inputs={
             experiment,
             model,
             task,
             device,

 with gr.Blocks() as demo:
     # title text
+    gr.HTML("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI   🏋️</h1>")
     # explanation text
     gr.Markdown(
         "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):"
     model = gr.Textbox(
         label="model",
         value="bert-base-uncased",
+        info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.",
     )
     task = gr.Dropdown(
         label="task",
         value="text-classification",
         choices=list(TASKS_TO_AUTOMODELS.keys()),
+        info="Task to run the benchmark on. Can be infered automatically by submitting a model.",
     )
     device = gr.Dropdown(
         value="cpu",
         label="device",
         choices=DEVICES,
+        info="Device to run the benchmark on. make sure to duplicate the space if you wanna run on CUDA devices.",
     )
     experiment = gr.Textbox(
         label="experiment_name",
         value=f"experiment_{random.getrandbits(16)}",
+        info="Name of the experiment. Will be used to create a folder where results are stored.",
     )
     model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
                     label="backend",
                     choices=BACKENDS,
                     value=BACKENDS[0],
+                    info="Backend to run the benchmark on.",
                 )
             with gr.Row() as backend_configs:
                     label="benchmark",
                     choices=BENCHMARKS,
                     value=BENCHMARKS[0],
+                    info="Type of benchmark to run.",
                 )
             with gr.Row() as benchmark_configs:
             fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS],
         )
+    baseline = gr.Checkbox(
+        value=False,
+        label="Compare to Baseline",
+        info="If checked, will run two experiments: one with the given configuration, and another with a a baseline pytorch configuration.",
+    )
     button = gr.Button(value="Run Benchmark", variant="primary")
     with gr.Accordion(label="", open=True):
         html_output = gr.HTML()
         fn=run_benchmark,
         inputs={
             experiment,
+            baseline,
             model,
             task,
             device,

run.py CHANGED Viewed

@@ -8,9 +8,13 @@ ansi2html_converter = Ansi2HTMLConverter(inline=True)
 def run_benchmark(kwargs):
     for key, value in kwargs.copy().items():
-        if key.label == "experiment_name":
             experiment_name = value
             kwargs.pop(key)
         elif key.label == "model":
             model = value
             kwargs.pop(key)
@@ -29,6 +33,37 @@ def run_benchmark(kwargs):
         else:
             continue
     arguments = [
         "optimum-benchmark",
         "--config-dir",
@@ -42,7 +77,6 @@ def run_benchmark(kwargs):
         f"benchmark={benchmark}",
         f"experiment_name={experiment_name}",
     ]
     for component, value in kwargs.items():
         if f"{backend}." in component.label or f"{benchmark}." in component.label:
             label = component.label.replace(f"{backend}.", "backend.").replace(f"{benchmark}.", "benchmark.")
@@ -53,45 +87,92 @@ def run_benchmark(kwargs):
             else:
                 arguments.append(f"{label}={value}")
-    command = "<br>".join(arguments)
-    html_text = f"<h3>Running command:</h3>{command}"
     yield gr.update(value=html_text), gr.update(interactive=False), gr.update(visible=False)
     # stream subprocess output
     process = subprocess.Popen(
-        arguments,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         universal_newlines=True,
     )
-    ansi_text = ""
     for ansi_line in iter(process.stdout.readline, ""):
         # stream process output to stdout
         print(ansi_line, end="")
         # skip torch.distributed.nn.jit.instantiator messages
         if "torch.distributed.nn.jit.instantiator" in ansi_line:
             continue
-        # if the last message is a download message (contains "Downloading ") then remove it and replace it with a new one
-        if "Downloading " in ansi_text and "Downloading " in ansi_line:
-            ansi_text = ansi_text.split("\n")[:-2]
-            print(ansi_text)
-            ansi_text.append(ansi_line)
-            ansi_text = "\n".join(ansi_text)
         else:
             # append line to ansi text
-            ansi_text += ansi_line
         # convert ansi to html
-        html_text = ansi2html_converter.convert(ansi_text)
         # stream html output to gradio
-        yield gr.update(value=html_text), gr.update(interactive=False), gr.update(visible=False)
-    if process.returncode != 0:
-        table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
-        table_update = gr.update(visible=True, value={"headers": list(table.columns), "data": table.values.tolist()})
-    else:
-        table_update = gr.update(visible=False)
-    yield gr.update(value=html_text), gr.update(interactive=True), table_update
-    return

 def run_benchmark(kwargs):
     for key, value in kwargs.copy().items():
+        if key.label == "Compare to Baseline":
+            baseline = value
+            kwargs.pop(key)
+        elif key.label == "experiment_name":
             experiment_name = value
             kwargs.pop(key)
         elif key.label == "model":
             model = value
             kwargs.pop(key)
         else:
             continue
+    if baseline:
+        baseline_arguments = [
+            "optimum-benchmark",
+            "--config-dir",
+            "./configs",
+            "--config-name",
+            "base_config",
+            f"backend=pytorch",
+            f"task={task}",
+            f"model={model}",
+            f"device={device}",
+            f"benchmark={benchmark}",
+            f"experiment_name={experiment_name}_baseline",
+        ]
+        for component, value in kwargs.items():
+            if f"{benchmark}." in component.label:
+                label = component.label.replace(f"{benchmark}.", "benchmark.")
+                if isinstance(component, gr.Dataframe):
+                    for sub_key, sub_value in zip(component.headers, value[0]):
+                        baseline_arguments.append(f"++{label}.{sub_key}={sub_value}")
+                else:
+                    baseline_arguments.append(f"{label}={value}")
+        # yield from run_experiment(baseline_arguments) but get the return code
+        baseline_return_code, html_text = yield from run_experiment(baseline_arguments, "")
+        if baseline_return_code is not None and baseline_return_code != 0:
+            yield gr.update(value=html_text), gr.update(interactive=True), gr.update(visible=False)
+            return
+    else:
+        html_text = ""
     arguments = [
         "optimum-benchmark",
         "--config-dir",
         f"benchmark={benchmark}",
         f"experiment_name={experiment_name}",
     ]
     for component, value in kwargs.items():
         if f"{backend}." in component.label or f"{benchmark}." in component.label:
             label = component.label.replace(f"{backend}.", "backend.").replace(f"{benchmark}.", "benchmark.")
             else:
                 arguments.append(f"{label}={value}")
+    return_code, html_text = yield from run_experiment(arguments, html_text)
+    if return_code is not None and return_code != 0:
+        yield gr.update(value=html_text), gr.update(interactive=True), gr.update(visible=False)
+        return
+    if baseline:
+        baseline_table = pd.read_csv(f"runs/{experiment_name}_baseline/{benchmark}_results.csv", index_col=0)
+        table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
+        # concat tables
+        table = pd.concat([baseline_table, table], axis=0)
+        table["experiment_name"] = [experiment_name + "_baseline", experiment_name]
+        table = table.set_index("experiment_name")
+        table.reset_index(inplace=True)
+        # compute speedups
+        if "forward.latency(s)" in table.columns:
+            table["forward.latency.speedup(%)"] = (
+                table["forward.latency(s)"] / table["forward.latency(s)"].iloc[0] - 1
+            ) * 100
+            table["forward.latency.speedup(%)"] = table["forward.latency.speedup(%)"].round(2)
+        if "forward.throughput(samples/s)" in table.columns:
+            table["forward.throughput.speedup(%)"] = (
+                table["forward.throughput(samples/s)"] / table["forward.throughput(samples/s)"].iloc[0] - 1
+            ) * 100
+            table["forward.throughput.speedup(%)"] = table["forward.throughput.speedup(%)"].round(2)
+        if "forward.peak_memory(MB)" in table.columns:
+            table["forward.peak_memory.savings(%)"] = (
+                table["forward.peak_memory(MB)"] / table["forward.peak_memory(MB)"].iloc[0] - 1
+            ) * 100
+            table["forward.peak_memory.savings(%)"] = table["forward.peak_memory.savings(%)"].round(2)
+        if "generate.latency(s)" in table.columns:
+            table["generate.latency.speedup(%)"] = (
+                table["generate.latency(s)"] / table["generate.latency(s)"].iloc[0] - 1
+            ) * 100
+            table["generate.latency.speedup(%)"] = table["generate.latency.speedup(%)"].round(2)
+        if "generate.throughput(tokens/s)" in table.columns:
+            table["generate.throughput.speedup(%)"] = (
+                table["generate.throughput(tokens/s)"] / table["generate.throughput(tokens/s)"].iloc[0] - 1
+            ) * 100
+            table["generate.throughput.speedup(%)"] = table["generate.throughput.speedup(%)"].round(2)
+        if "generate.peak_memory(MB)" in table.columns:
+            table["generate.peak_memory.savings(%)"] = (
+                table["generate.peak_memory(MB)"] / table["generate.peak_memory(MB)"].iloc[0] - 1
+            ) * 100
+            table["generate.peak_memory.savings(%)"] = table["generate.peak_memory.savings(%)"].round(2)
+    else:
+        table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
+    table_update = gr.update(visible=True, value={"headers": list(table.columns), "data": table.values.tolist()})
+    yield gr.update(value=html_text), gr.update(interactive=True), table_update
+    return
+def run_experiment(args, html_text=""):
+    command = "<br>".join(args)
+    html_text += f"<h3>Running command:</h3>{command}"
     yield gr.update(value=html_text), gr.update(interactive=False), gr.update(visible=False)
     # stream subprocess output
     process = subprocess.Popen(
+        args,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
         universal_newlines=True,
     )
+    curr_ansi_text = ""
     for ansi_line in iter(process.stdout.readline, ""):
         # stream process output to stdout
         print(ansi_line, end="")
         # skip torch.distributed.nn.jit.instantiator messages
         if "torch.distributed.nn.jit.instantiator" in ansi_line:
             continue
+        # process download messages
+        if "Downloading " in curr_ansi_text and "Downloading " in ansi_line:
+            curr_ansi_text = curr_ansi_text.split("\n")[:-2]
+            print(curr_ansi_text)
+            curr_ansi_text.append(ansi_line)
+            curr_ansi_text = "\n".join(curr_ansi_text)
         else:
             # append line to ansi text
+            curr_ansi_text += ansi_line
         # convert ansi to html
+        curr_html_text = ansi2html_converter.convert(curr_ansi_text)
         # stream html output to gradio
+        cumul_html_text = html_text + "<br><h3>Streaming logs:</h3>" + curr_html_text
+        yield gr.update(value=cumul_html_text), gr.update(interactive=False), gr.update(visible=False)
+    return process.returncode, cumul_html_text