sherzod-hakimov
commited on
Commit
•
18d5ac3
1
Parent(s):
69c36b6
update
Browse files- app.py +2 -1
- requirements.txt +1 -1
- src/leaderboard_utils.py +27 -16
app.py
CHANGED
@@ -8,7 +8,7 @@ from src.plot_utils import split_models, compare_plots
|
|
8 |
dataframe_height = 800 # Height of the table in pixels
|
9 |
# Get CSV data
|
10 |
global primary_leaderboard_df, version_dfs, version_names
|
11 |
-
primary_leaderboard_df, version_dfs, version_names = get_github_data()
|
12 |
|
13 |
global prev_df
|
14 |
prev_df = version_dfs[0]
|
@@ -48,6 +48,7 @@ with main_app:
|
|
48 |
)
|
49 |
|
50 |
gr.HTML(CLEMSCORE_TEXT)
|
|
|
51 |
|
52 |
# Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
|
53 |
dummy_leaderboard_table = gr.Dataframe(
|
|
|
8 |
dataframe_height = 800 # Height of the table in pixels
|
9 |
# Get CSV data
|
10 |
global primary_leaderboard_df, version_dfs, version_names
|
11 |
+
primary_leaderboard_df, version_dfs, version_names, date = get_github_data()
|
12 |
|
13 |
global prev_df
|
14 |
prev_df = version_dfs[0]
|
|
|
48 |
)
|
49 |
|
50 |
gr.HTML(CLEMSCORE_TEXT)
|
51 |
+
gr.HTML(f"Last updated - {date}")
|
52 |
|
53 |
# Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
|
54 |
dummy_leaderboard_table = gr.Dataframe(
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
gradio==
|
2 |
pandas==2.0.0
|
3 |
plotly==5.18.0
|
|
|
1 |
+
gradio==4.36.1
|
2 |
pandas==2.0.0
|
3 |
plotly==5.18.0
|
src/leaderboard_utils.py
CHANGED
@@ -2,17 +2,19 @@ import os
|
|
2 |
import pandas as pd
|
3 |
import requests, json
|
4 |
from io import StringIO
|
|
|
|
|
5 |
|
6 |
def get_github_data():
|
7 |
-
|
8 |
Get data from csv files on Github
|
9 |
Args:
|
10 |
-
None
|
11 |
-
Returns:
|
12 |
-
latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
|
13 |
all_dfs: list of dataframes for previous versions + latest version including columns for all games
|
14 |
all_vnames: list of the names for the previous versions + latest version (For Details and Versions Tab Dropdown)
|
15 |
-
|
16 |
uname = "clembench"
|
17 |
repo = "clembench-runs"
|
18 |
json_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/benchmark_runs.json"
|
@@ -27,11 +29,18 @@ def get_github_data():
|
|
27 |
csv_path = ver['result_file'].split('/')[1:]
|
28 |
csv_path = '/'.join(csv_path)
|
29 |
|
30 |
-
#Sort by latest version
|
31 |
float_content = [float(s[1:]) for s in version_names]
|
32 |
float_content.sort(reverse=True)
|
33 |
version_names = ['v'+str(s) for s in float_content]
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
DFS = []
|
36 |
for version in version_names:
|
37 |
result_url = csv_url+ version + '/' + csv_path
|
@@ -44,7 +53,7 @@ def get_github_data():
|
|
44 |
else:
|
45 |
print(f"Failed to read CSV file for version : {version}. Status Code : {resp.status_code}")
|
46 |
|
47 |
-
# Only keep
|
48 |
latest_df_dummy = DFS[0]
|
49 |
all_columns = list(latest_df_dummy.columns)
|
50 |
keep_columns = all_columns[0:4]
|
@@ -56,22 +65,23 @@ def get_github_data():
|
|
56 |
for df, name in zip(DFS, version_names):
|
57 |
all_dfs.append(df)
|
58 |
all_vnames.append(name)
|
59 |
-
return latest_df, all_dfs, all_vnames
|
60 |
|
61 |
else:
|
62 |
print(f"Failed to read JSON file: Status Code : {resp.status_code}")
|
63 |
|
|
|
64 |
def process_df(df: pd.DataFrame) -> pd.DataFrame:
|
65 |
-
|
66 |
-
Process dataframe
|
67 |
-
- Remove repition in model names
|
68 |
- Convert datatypes to sort by "float" instead of "str" for sorting
|
69 |
- Update column names
|
70 |
Args:
|
71 |
df: Unprocessed Dataframe (after using update_cols)
|
72 |
Returns:
|
73 |
df: Processed Dataframe
|
74 |
-
|
75 |
|
76 |
# Change column type to float from str
|
77 |
list_column_names = list(df.columns)
|
@@ -107,15 +117,16 @@ def process_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
107 |
df = df.rename(columns=map_cols)
|
108 |
return df
|
109 |
|
|
|
110 |
def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
111 |
-
|
112 |
Filter the dataframe based on the search query
|
113 |
Args:
|
114 |
df: Unfiltered dataframe
|
115 |
query: a string of queries separated by ";"
|
116 |
Return:
|
117 |
-
filtered_df: Dataframe containing searched queries in the 'Model' column
|
118 |
-
|
119 |
queries = query.split(';')
|
120 |
list_cols = list(df.columns)
|
121 |
df_len = len(df)
|
@@ -134,4 +145,4 @@ def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
|
134 |
if query == "":
|
135 |
return df
|
136 |
|
137 |
-
return filtered_df
|
|
|
2 |
import pandas as pd
|
3 |
import requests, json
|
4 |
from io import StringIO
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
|
8 |
def get_github_data():
|
9 |
+
"""
|
10 |
Get data from csv files on Github
|
11 |
Args:
|
12 |
+
None
|
13 |
+
Returns:
|
14 |
+
latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
|
15 |
all_dfs: list of dataframes for previous versions + latest version including columns for all games
|
16 |
all_vnames: list of the names for the previous versions + latest version (For Details and Versions Tab Dropdown)
|
17 |
+
"""
|
18 |
uname = "clembench"
|
19 |
repo = "clembench-runs"
|
20 |
json_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/benchmark_runs.json"
|
|
|
29 |
csv_path = ver['result_file'].split('/')[1:]
|
30 |
csv_path = '/'.join(csv_path)
|
31 |
|
32 |
+
# Sort by latest version
|
33 |
float_content = [float(s[1:]) for s in version_names]
|
34 |
float_content.sort(reverse=True)
|
35 |
version_names = ['v'+str(s) for s in float_content]
|
36 |
|
37 |
+
# Get date of latest version
|
38 |
+
for data in versions:
|
39 |
+
if data['version'] == version_names[0]:
|
40 |
+
date = data['date'] # Should be in YYYY/MM/DD format
|
41 |
+
date_obj = datetime.strptime(date, "%Y/%m/%d")
|
42 |
+
date = date_obj.strftime("%d %b %Y")
|
43 |
+
|
44 |
DFS = []
|
45 |
for version in version_names:
|
46 |
result_url = csv_url+ version + '/' + csv_path
|
|
|
53 |
else:
|
54 |
print(f"Failed to read CSV file for version : {version}. Status Code : {resp.status_code}")
|
55 |
|
56 |
+
# Only keep relevant columns for the main leaderboard
|
57 |
latest_df_dummy = DFS[0]
|
58 |
all_columns = list(latest_df_dummy.columns)
|
59 |
keep_columns = all_columns[0:4]
|
|
|
65 |
for df, name in zip(DFS, version_names):
|
66 |
all_dfs.append(df)
|
67 |
all_vnames.append(name)
|
68 |
+
return latest_df, all_dfs, all_vnames, date
|
69 |
|
70 |
else:
|
71 |
print(f"Failed to read JSON file: Status Code : {resp.status_code}")
|
72 |
|
73 |
+
|
74 |
def process_df(df: pd.DataFrame) -> pd.DataFrame:
|
75 |
+
"""
|
76 |
+
Process dataframe
|
77 |
+
- Remove repition in model names
|
78 |
- Convert datatypes to sort by "float" instead of "str" for sorting
|
79 |
- Update column names
|
80 |
Args:
|
81 |
df: Unprocessed Dataframe (after using update_cols)
|
82 |
Returns:
|
83 |
df: Processed Dataframe
|
84 |
+
"""
|
85 |
|
86 |
# Change column type to float from str
|
87 |
list_column_names = list(df.columns)
|
|
|
117 |
df = df.rename(columns=map_cols)
|
118 |
return df
|
119 |
|
120 |
+
|
121 |
def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
122 |
+
"""
|
123 |
Filter the dataframe based on the search query
|
124 |
Args:
|
125 |
df: Unfiltered dataframe
|
126 |
query: a string of queries separated by ";"
|
127 |
Return:
|
128 |
+
filtered_df: Dataframe containing searched queries in the 'Model' column
|
129 |
+
"""
|
130 |
queries = query.split(';')
|
131 |
list_cols = list(df.columns)
|
132 |
df_len = len(df)
|
|
|
145 |
if query == "":
|
146 |
return df
|
147 |
|
148 |
+
return filtered_df
|