fix missing value
Browse files- app.py +5 -3
- src/build.py +1 -1
app.py
CHANGED
@@ -14,6 +14,8 @@ def plot_throughput(bs=1):
|
|
14 |
df.loc[df['Models'].str.contains('StarCoder|SantaCoder'), 'color'] = 'orange'
|
15 |
df.loc[df['Models'].str.contains('CodeGen'), 'color'] = 'pink'
|
16 |
df.loc[df['Models'].str.contains('Replit'), 'color'] = 'purple'
|
|
|
|
|
17 |
|
18 |
fig = go.Figure()
|
19 |
|
@@ -65,7 +67,7 @@ with demo:
|
|
65 |
leaderboard_df = gr.components.Dataframe(
|
66 |
value=df, headers=headers, datatype=["str" for _ in range(len(headers))]
|
67 |
)
|
68 |
-
|
69 |
with gr.TabItem("📊 Performance Plot", id=1):
|
70 |
with gr.Row():
|
71 |
bs_1_plot = gr.components.Plot(
|
@@ -77,7 +79,7 @@ with demo:
|
|
77 |
value=plot_throughput(bs=50),
|
78 |
elem_id="bs50-plot",
|
79 |
show_label=False,
|
80 |
-
)
|
81 |
with gr.Row():
|
82 |
gr.Markdown(
|
83 |
"""Notes:
|
@@ -85,7 +87,7 @@ with demo:
|
|
85 |
<li> Throughputs and peak memory usage are measured using <a href="https://github.com/huggingface/optimum-benchmark/tree/main">Optimum-Benchmark</a> which powers <a href="https://huggingface.co/spaces/optimum/llm-perf-leaderboard">Open LLM-Perf Leaderboard</a>. (0 throughput corresponds to OOM).</li>
|
86 |
<li> All models were evaluated with the <a href="https://github.com/bigcode-project/bigcode-evaluation-harness/tree/main">bigcode-evaluation-harness</a> with top-p=0.95, temperature=0.2 and n_samples=50.</li>
|
87 |
<li> HumanEval-Python, reports the pass@1 on HumanEval, the rest is from MultiPL-E benchmark.</li>
|
88 |
-
<li> Average score is the average pass@1 over all languages. For Win Rate, we rank
|
89 |
<li> #Languages column represents the number of programming languages included during the pretraining.
|
90 |
</ul>"""
|
91 |
)
|
|
|
14 |
df.loc[df['Models'].str.contains('StarCoder|SantaCoder'), 'color'] = 'orange'
|
15 |
df.loc[df['Models'].str.contains('CodeGen'), 'color'] = 'pink'
|
16 |
df.loc[df['Models'].str.contains('Replit'), 'color'] = 'purple'
|
17 |
+
df.loc[df['Models'].str.contains('Wizard'), 'color'] = '#00b3b3'
|
18 |
+
df.loc[df['Models'].str.contains('CodeGeeX'), 'color'] = '#00cc00'
|
19 |
|
20 |
fig = go.Figure()
|
21 |
|
|
|
67 |
leaderboard_df = gr.components.Dataframe(
|
68 |
value=df, headers=headers, datatype=["str" for _ in range(len(headers))]
|
69 |
)
|
70 |
+
"""
|
71 |
with gr.TabItem("📊 Performance Plot", id=1):
|
72 |
with gr.Row():
|
73 |
bs_1_plot = gr.components.Plot(
|
|
|
79 |
value=plot_throughput(bs=50),
|
80 |
elem_id="bs50-plot",
|
81 |
show_label=False,
|
82 |
+
)"""
|
83 |
with gr.Row():
|
84 |
gr.Markdown(
|
85 |
"""Notes:
|
|
|
87 |
<li> Throughputs and peak memory usage are measured using <a href="https://github.com/huggingface/optimum-benchmark/tree/main">Optimum-Benchmark</a> which powers <a href="https://huggingface.co/spaces/optimum/llm-perf-leaderboard">Open LLM-Perf Leaderboard</a>. (0 throughput corresponds to OOM).</li>
|
88 |
<li> All models were evaluated with the <a href="https://github.com/bigcode-project/bigcode-evaluation-harness/tree/main">bigcode-evaluation-harness</a> with top-p=0.95, temperature=0.2 and n_samples=50.</li>
|
89 |
<li> HumanEval-Python, reports the pass@1 on HumanEval, the rest is from MultiPL-E benchmark.</li>
|
90 |
+
<li> Average score is the average pass@1 over all languages. For Win Rate, we compute model rank for each language as <pre><code>num_models - (rank -1)</code></pre> and average their rankings.</li>
|
91 |
<li> #Languages column represents the number of programming languages included during the pretraining.
|
92 |
</ul>"""
|
93 |
)
|
src/build.py
CHANGED
@@ -35,7 +35,7 @@ data = {
|
|
35 |
"racket": [0.66, 0.07, 11.77, 11.08, 7.87, 3.22, 0, 5.03, 4.07, 10.37, 11.35,13.39],
|
36 |
"rust": [4.21, 21.84, 24.46, 22.60, 16.32, 15.19, 2.00, 10.24, 7.83, 21.84, 19.94, 33.74],
|
37 |
"swift": [1.25, 22.74, 16.74, 15.10, 9.98, 5.88, 0.70, 3.92, 1.71, 16.62, 20.81, 27.06],
|
38 |
-
"Throughput (tokens/s) bs=50": [0, 1490.00, 1460.00, 1700.00, 1770.00, 577.00, 2270.00, 2360.00, 687.00, 680.00,
|
39 |
"Peak Memory (MB)": [32890, 33461, 32366, 16512, 8414, 7176, 4602, 4586, 15336, 15336, 0, 32414],
|
40 |
}
|
41 |
|
|
|
35 |
"racket": [0.66, 0.07, 11.77, 11.08, 7.87, 3.22, 0, 5.03, 4.07, 10.37, 11.35,13.39],
|
36 |
"rust": [4.21, 21.84, 24.46, 22.60, 16.32, 15.19, 2.00, 10.24, 7.83, 21.84, 19.94, 33.74],
|
37 |
"swift": [1.25, 22.74, 16.74, 15.10, 9.98, 5.88, 0.70, 3.92, 1.71, 16.62, 20.81, 27.06],
|
38 |
+
"Throughput (tokens/s) bs=50": [0, 1490.00, 1460.00, 1700.00, 1770.00, 577.00, 2270.00, 2360.00, 687.00, 680.00, 0, 1470.00],
|
39 |
"Peak Memory (MB)": [32890, 33461, 32366, 16512, 8414, 7176, 4602, 4586, 15336, 15336, 0, 32414],
|
40 |
}
|
41 |
|