IlyasMoutawwakil HF staff commited on
Commit
b71e276
1 Parent(s): 0425d1c

benchmark vs baseline

Browse files
Files changed (2) hide show
  1. app.py +14 -1
  2. run.py +105 -24
app.py CHANGED
@@ -21,7 +21,7 @@ DEVICES = ["cpu", "cuda"]
21
 
22
  with gr.Blocks() as demo:
23
  # title text
24
- gr.HTML("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI 🏋️</h1>")
25
  # explanation text
26
  gr.Markdown(
27
  "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):"
@@ -32,20 +32,24 @@ with gr.Blocks() as demo:
32
  model = gr.Textbox(
33
  label="model",
34
  value="bert-base-uncased",
 
35
  )
36
  task = gr.Dropdown(
37
  label="task",
38
  value="text-classification",
39
  choices=list(TASKS_TO_AUTOMODELS.keys()),
 
40
  )
41
  device = gr.Dropdown(
42
  value="cpu",
43
  label="device",
44
  choices=DEVICES,
 
45
  )
46
  experiment = gr.Textbox(
47
  label="experiment_name",
48
  value=f"experiment_{random.getrandbits(16)}",
 
49
  )
50
  model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
51
 
@@ -56,6 +60,7 @@ with gr.Blocks() as demo:
56
  label="backend",
57
  choices=BACKENDS,
58
  value=BACKENDS[0],
 
59
  )
60
 
61
  with gr.Row() as backend_configs:
@@ -81,6 +86,7 @@ with gr.Blocks() as demo:
81
  label="benchmark",
82
  choices=BENCHMARKS,
83
  value=BENCHMARKS[0],
 
84
  )
85
 
86
  with gr.Row() as benchmark_configs:
@@ -96,6 +102,12 @@ with gr.Blocks() as demo:
96
  fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS],
97
  )
98
 
 
 
 
 
 
 
99
  button = gr.Button(value="Run Benchmark", variant="primary")
100
  with gr.Accordion(label="", open=True):
101
  html_output = gr.HTML()
@@ -105,6 +117,7 @@ with gr.Blocks() as demo:
105
  fn=run_benchmark,
106
  inputs={
107
  experiment,
 
108
  model,
109
  task,
110
  device,
 
21
 
22
  with gr.Blocks() as demo:
23
  # title text
24
+ gr.HTML("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI 🏋️</h1>")
25
  # explanation text
26
  gr.Markdown(
27
  "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git):"
 
32
  model = gr.Textbox(
33
  label="model",
34
  value="bert-base-uncased",
35
+ info="Model to run the benchmark on. In the particular case of this space, only models that are hosted on huggingface.co/models can be benchmarked.",
36
  )
37
  task = gr.Dropdown(
38
  label="task",
39
  value="text-classification",
40
  choices=list(TASKS_TO_AUTOMODELS.keys()),
41
+ info="Task to run the benchmark on. Can be infered automatically by submitting a model.",
42
  )
43
  device = gr.Dropdown(
44
  value="cpu",
45
  label="device",
46
  choices=DEVICES,
47
+ info="Device to run the benchmark on. make sure to duplicate the space if you wanna run on CUDA devices.",
48
  )
49
  experiment = gr.Textbox(
50
  label="experiment_name",
51
  value=f"experiment_{random.getrandbits(16)}",
52
+ info="Name of the experiment. Will be used to create a folder where results are stored.",
53
  )
54
  model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
55
 
 
60
  label="backend",
61
  choices=BACKENDS,
62
  value=BACKENDS[0],
63
+ info="Backend to run the benchmark on.",
64
  )
65
 
66
  with gr.Row() as backend_configs:
 
86
  label="benchmark",
87
  choices=BENCHMARKS,
88
  value=BENCHMARKS[0],
89
+ info="Type of benchmark to run.",
90
  )
91
 
92
  with gr.Row() as benchmark_configs:
 
102
  fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS],
103
  )
104
 
105
+ baseline = gr.Checkbox(
106
+ value=False,
107
+ label="Compare to Baseline",
108
+ info="If checked, will run two experiments: one with the given configuration, and another with a a baseline pytorch configuration.",
109
+ )
110
+
111
  button = gr.Button(value="Run Benchmark", variant="primary")
112
  with gr.Accordion(label="", open=True):
113
  html_output = gr.HTML()
 
117
  fn=run_benchmark,
118
  inputs={
119
  experiment,
120
+ baseline,
121
  model,
122
  task,
123
  device,
run.py CHANGED
@@ -8,9 +8,13 @@ ansi2html_converter = Ansi2HTMLConverter(inline=True)
8
 
9
  def run_benchmark(kwargs):
10
  for key, value in kwargs.copy().items():
11
- if key.label == "experiment_name":
 
 
 
12
  experiment_name = value
13
  kwargs.pop(key)
 
14
  elif key.label == "model":
15
  model = value
16
  kwargs.pop(key)
@@ -29,6 +33,37 @@ def run_benchmark(kwargs):
29
  else:
30
  continue
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  arguments = [
33
  "optimum-benchmark",
34
  "--config-dir",
@@ -42,7 +77,6 @@ def run_benchmark(kwargs):
42
  f"benchmark={benchmark}",
43
  f"experiment_name={experiment_name}",
44
  ]
45
-
46
  for component, value in kwargs.items():
47
  if f"{backend}." in component.label or f"{benchmark}." in component.label:
48
  label = component.label.replace(f"{backend}.", "backend.").replace(f"{benchmark}.", "benchmark.")
@@ -53,45 +87,92 @@ def run_benchmark(kwargs):
53
  else:
54
  arguments.append(f"{label}={value}")
55
 
56
- command = "<br>".join(arguments)
57
- html_text = f"<h3>Running command:</h3>{command}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  yield gr.update(value=html_text), gr.update(interactive=False), gr.update(visible=False)
59
 
60
  # stream subprocess output
61
  process = subprocess.Popen(
62
- arguments,
63
  stdout=subprocess.PIPE,
64
  stderr=subprocess.STDOUT,
65
  universal_newlines=True,
66
  )
67
 
68
- ansi_text = ""
69
  for ansi_line in iter(process.stdout.readline, ""):
70
  # stream process output to stdout
71
  print(ansi_line, end="")
72
-
73
  # skip torch.distributed.nn.jit.instantiator messages
74
  if "torch.distributed.nn.jit.instantiator" in ansi_line:
75
  continue
76
- # if the last message is a download message (contains "Downloading ") then remove it and replace it with a new one
77
- if "Downloading " in ansi_text and "Downloading " in ansi_line:
78
- ansi_text = ansi_text.split("\n")[:-2]
79
- print(ansi_text)
80
- ansi_text.append(ansi_line)
81
- ansi_text = "\n".join(ansi_text)
82
  else:
83
  # append line to ansi text
84
- ansi_text += ansi_line
85
  # convert ansi to html
86
- html_text = ansi2html_converter.convert(ansi_text)
87
  # stream html output to gradio
88
- yield gr.update(value=html_text), gr.update(interactive=False), gr.update(visible=False)
 
89
 
90
- if process.returncode != 0:
91
- table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
92
- table_update = gr.update(visible=True, value={"headers": list(table.columns), "data": table.values.tolist()})
93
- else:
94
- table_update = gr.update(visible=False)
95
-
96
- yield gr.update(value=html_text), gr.update(interactive=True), table_update
97
- return
 
8
 
9
  def run_benchmark(kwargs):
10
  for key, value in kwargs.copy().items():
11
+ if key.label == "Compare to Baseline":
12
+ baseline = value
13
+ kwargs.pop(key)
14
+ elif key.label == "experiment_name":
15
  experiment_name = value
16
  kwargs.pop(key)
17
+
18
  elif key.label == "model":
19
  model = value
20
  kwargs.pop(key)
 
33
  else:
34
  continue
35
 
36
+ if baseline:
37
+ baseline_arguments = [
38
+ "optimum-benchmark",
39
+ "--config-dir",
40
+ "./configs",
41
+ "--config-name",
42
+ "base_config",
43
+ f"backend=pytorch",
44
+ f"task={task}",
45
+ f"model={model}",
46
+ f"device={device}",
47
+ f"benchmark={benchmark}",
48
+ f"experiment_name={experiment_name}_baseline",
49
+ ]
50
+ for component, value in kwargs.items():
51
+ if f"{benchmark}." in component.label:
52
+ label = component.label.replace(f"{benchmark}.", "benchmark.")
53
+ if isinstance(component, gr.Dataframe):
54
+ for sub_key, sub_value in zip(component.headers, value[0]):
55
+ baseline_arguments.append(f"++{label}.{sub_key}={sub_value}")
56
+ else:
57
+ baseline_arguments.append(f"{label}={value}")
58
+
59
+ # yield from run_experiment(baseline_arguments) but get the return code
60
+ baseline_return_code, html_text = yield from run_experiment(baseline_arguments, "")
61
+ if baseline_return_code is not None and baseline_return_code != 0:
62
+ yield gr.update(value=html_text), gr.update(interactive=True), gr.update(visible=False)
63
+ return
64
+ else:
65
+ html_text = ""
66
+
67
  arguments = [
68
  "optimum-benchmark",
69
  "--config-dir",
 
77
  f"benchmark={benchmark}",
78
  f"experiment_name={experiment_name}",
79
  ]
 
80
  for component, value in kwargs.items():
81
  if f"{backend}." in component.label or f"{benchmark}." in component.label:
82
  label = component.label.replace(f"{backend}.", "backend.").replace(f"{benchmark}.", "benchmark.")
 
87
  else:
88
  arguments.append(f"{label}={value}")
89
 
90
+ return_code, html_text = yield from run_experiment(arguments, html_text)
91
+ if return_code is not None and return_code != 0:
92
+ yield gr.update(value=html_text), gr.update(interactive=True), gr.update(visible=False)
93
+ return
94
+
95
+ if baseline:
96
+ baseline_table = pd.read_csv(f"runs/{experiment_name}_baseline/{benchmark}_results.csv", index_col=0)
97
+ table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
98
+ # concat tables
99
+ table = pd.concat([baseline_table, table], axis=0)
100
+ table["experiment_name"] = [experiment_name + "_baseline", experiment_name]
101
+ table = table.set_index("experiment_name")
102
+ table.reset_index(inplace=True)
103
+ # compute speedups
104
+ if "forward.latency(s)" in table.columns:
105
+ table["forward.latency.speedup(%)"] = (
106
+ table["forward.latency(s)"] / table["forward.latency(s)"].iloc[0] - 1
107
+ ) * 100
108
+ table["forward.latency.speedup(%)"] = table["forward.latency.speedup(%)"].round(2)
109
+ if "forward.throughput(samples/s)" in table.columns:
110
+ table["forward.throughput.speedup(%)"] = (
111
+ table["forward.throughput(samples/s)"] / table["forward.throughput(samples/s)"].iloc[0] - 1
112
+ ) * 100
113
+ table["forward.throughput.speedup(%)"] = table["forward.throughput.speedup(%)"].round(2)
114
+ if "forward.peak_memory(MB)" in table.columns:
115
+ table["forward.peak_memory.savings(%)"] = (
116
+ table["forward.peak_memory(MB)"] / table["forward.peak_memory(MB)"].iloc[0] - 1
117
+ ) * 100
118
+ table["forward.peak_memory.savings(%)"] = table["forward.peak_memory.savings(%)"].round(2)
119
+ if "generate.latency(s)" in table.columns:
120
+ table["generate.latency.speedup(%)"] = (
121
+ table["generate.latency(s)"] / table["generate.latency(s)"].iloc[0] - 1
122
+ ) * 100
123
+ table["generate.latency.speedup(%)"] = table["generate.latency.speedup(%)"].round(2)
124
+ if "generate.throughput(tokens/s)" in table.columns:
125
+ table["generate.throughput.speedup(%)"] = (
126
+ table["generate.throughput(tokens/s)"] / table["generate.throughput(tokens/s)"].iloc[0] - 1
127
+ ) * 100
128
+ table["generate.throughput.speedup(%)"] = table["generate.throughput.speedup(%)"].round(2)
129
+ if "generate.peak_memory(MB)" in table.columns:
130
+ table["generate.peak_memory.savings(%)"] = (
131
+ table["generate.peak_memory(MB)"] / table["generate.peak_memory(MB)"].iloc[0] - 1
132
+ ) * 100
133
+ table["generate.peak_memory.savings(%)"] = table["generate.peak_memory.savings(%)"].round(2)
134
+
135
+ else:
136
+ table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
137
+
138
+ table_update = gr.update(visible=True, value={"headers": list(table.columns), "data": table.values.tolist()})
139
+ yield gr.update(value=html_text), gr.update(interactive=True), table_update
140
+ return
141
+
142
+
143
+ def run_experiment(args, html_text=""):
144
+ command = "<br>".join(args)
145
+ html_text += f"<h3>Running command:</h3>{command}"
146
  yield gr.update(value=html_text), gr.update(interactive=False), gr.update(visible=False)
147
 
148
  # stream subprocess output
149
  process = subprocess.Popen(
150
+ args,
151
  stdout=subprocess.PIPE,
152
  stderr=subprocess.STDOUT,
153
  universal_newlines=True,
154
  )
155
 
156
+ curr_ansi_text = ""
157
  for ansi_line in iter(process.stdout.readline, ""):
158
  # stream process output to stdout
159
  print(ansi_line, end="")
 
160
  # skip torch.distributed.nn.jit.instantiator messages
161
  if "torch.distributed.nn.jit.instantiator" in ansi_line:
162
  continue
163
+ # process download messages
164
+ if "Downloading " in curr_ansi_text and "Downloading " in ansi_line:
165
+ curr_ansi_text = curr_ansi_text.split("\n")[:-2]
166
+ print(curr_ansi_text)
167
+ curr_ansi_text.append(ansi_line)
168
+ curr_ansi_text = "\n".join(curr_ansi_text)
169
  else:
170
  # append line to ansi text
171
+ curr_ansi_text += ansi_line
172
  # convert ansi to html
173
+ curr_html_text = ansi2html_converter.convert(curr_ansi_text)
174
  # stream html output to gradio
175
+ cumul_html_text = html_text + "<br><h3>Streaming logs:</h3>" + curr_html_text
176
+ yield gr.update(value=cumul_html_text), gr.update(interactive=False), gr.update(visible=False)
177
 
178
+ return process.returncode, cumul_html_text