qiantong-xu
commited on
Commit
•
fdc4461
1
Parent(s):
1307e8d
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissi
|
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
6 |
|
7 |
-
COLUMN_NAMES = ["model", "Tuned on ToolBench", "Open Weather", "The Cat API", "Home Search", "Trip Booking", "Google Sheets", "VirtualHome", "WebShop Long", "WebShop Short", "Tabletop"]
|
8 |
UNTUNED_MODEL_RESULTS = '''[gpt4](https://platform.openai.com/docs/models/gpt-4) & 93.0 & 96.0 & 97.0 & 96.7 & 62.9 & 23.0 / 23.5 & 0.0 & 0.0 & 81.0 \\
|
9 |
[text-davinci-003](https://platform.openai.com/docs/models/gpt-3) & 99.0 & 98.0 & 97.0 & 89.2 & 62.9 & 31.0 / 25.1 & 0.0 & 0.0 & 66.7 \\
|
10 |
[gpt-3.5-turbo](https://platform.openai.com/docs/models/gpt-3-5) & 90.0 & 92.0 & 80.0 & 85.8 & 51.4 & 20.0 / 18.9 & 0.0 & 1.8 & 33.3 \\
|
@@ -56,12 +56,16 @@ def get_baseline_df():
|
|
56 |
for line in lines:
|
57 |
model_results = parse_line(line)
|
58 |
assert len(model_results) == 10
|
|
|
|
|
59 |
model_results.insert(1, "False")
|
60 |
df_data.append(model_results)
|
61 |
lines = TUNED_MODEL_RESULTS.split("\n")
|
62 |
for line in lines:
|
63 |
model_results = parse_line(line)
|
64 |
assert len(model_results) == 10
|
|
|
|
|
65 |
model_results.insert(1, "True")
|
66 |
df_data.append(model_results)
|
67 |
|
@@ -110,7 +114,7 @@ with block:
|
|
110 |
)
|
111 |
with gr.Row():
|
112 |
data = gr.components.Dataframe(
|
113 |
-
type="pandas", datatype=["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
114 |
)
|
115 |
with gr.Row():
|
116 |
data_run = gr.Button("Refresh")
|
|
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
6 |
|
7 |
+
COLUMN_NAMES = ["model", "Tuned on ToolBench", "Avg.", "Open Weather", "The Cat API", "Home Search", "Trip Booking", "Google Sheets", "VirtualHome", "WebShop Long", "WebShop Short", "Tabletop"]
|
8 |
UNTUNED_MODEL_RESULTS = '''[gpt4](https://platform.openai.com/docs/models/gpt-4) & 93.0 & 96.0 & 97.0 & 96.7 & 62.9 & 23.0 / 23.5 & 0.0 & 0.0 & 81.0 \\
|
9 |
[text-davinci-003](https://platform.openai.com/docs/models/gpt-3) & 99.0 & 98.0 & 97.0 & 89.2 & 62.9 & 31.0 / 25.1 & 0.0 & 0.0 & 66.7 \\
|
10 |
[gpt-3.5-turbo](https://platform.openai.com/docs/models/gpt-3-5) & 90.0 & 92.0 & 80.0 & 85.8 & 51.4 & 20.0 / 18.9 & 0.0 & 1.8 & 33.3 \\
|
|
|
56 |
for line in lines:
|
57 |
model_results = parse_line(line)
|
58 |
assert len(model_results) == 10
|
59 |
+
avg = sum(model_results[1:]) / 9
|
60 |
+
model_results.insert(1, avg)
|
61 |
model_results.insert(1, "False")
|
62 |
df_data.append(model_results)
|
63 |
lines = TUNED_MODEL_RESULTS.split("\n")
|
64 |
for line in lines:
|
65 |
model_results = parse_line(line)
|
66 |
assert len(model_results) == 10
|
67 |
+
avg = sum(model_results[1:]) / 9
|
68 |
+
model_results.insert(1, avg)
|
69 |
model_results.insert(1, "True")
|
70 |
df_data.append(model_results)
|
71 |
|
|
|
114 |
)
|
115 |
with gr.Row():
|
116 |
data = gr.components.Dataframe(
|
117 |
+
type="pandas", datatype=["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]
|
118 |
)
|
119 |
with gr.Row():
|
120 |
data_run = gr.Button("Refresh")
|