sherzod-hakimov
commited on
Upload 3 files
Browse files- src/leaderboard_utils.py +20 -34
- src/plot_utils.py +8 -13
- src/version_utils.py +9 -30
src/leaderboard_utils.py
CHANGED
@@ -29,56 +29,42 @@ def get_github_data():
|
|
29 |
json_data = response.json()
|
30 |
versions = json_data['versions']
|
31 |
|
32 |
-
# Sort version names - latest first
|
33 |
version_names = sorted(
|
34 |
[ver['version'] for ver in versions],
|
35 |
-
key=lambda v:
|
36 |
reverse=True
|
37 |
-
)
|
38 |
-
print(f"Found {len(version_names)} versions from get_github_data(): {version_names}.")
|
39 |
|
40 |
# Get Last updated date of the latest version
|
41 |
latest_version = version_names[0]
|
42 |
latest_date = next(
|
43 |
ver['date'] for ver in versions if ver['version'] == latest_version
|
44 |
)
|
45 |
-
formatted_date = datetime.strptime(latest_date, "%Y
|
46 |
|
47 |
# Get Leaderboard data - for text-only + multimodal
|
48 |
github_data = {}
|
49 |
|
50 |
-
# Collect Dataframes
|
51 |
-
text_dfs = []
|
52 |
mm_dfs = []
|
53 |
-
|
|
|
54 |
for version in version_names:
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
if
|
60 |
-
df = pd.read_csv(StringIO(
|
61 |
df = process_df(df)
|
62 |
-
df = df.sort_values(by=df.columns[1], ascending=False)
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
mm_url = f"{base_repo}{version}_multimodal/results.csv"
|
70 |
-
mm_response = requests.get(mm_url)
|
71 |
-
if mm_response.status_code == 200:
|
72 |
-
df = pd.read_csv(StringIO(mm_response.text))
|
73 |
-
df = process_df(df)
|
74 |
-
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
75 |
-
mm_dfs.append(df)
|
76 |
-
else:
|
77 |
-
print(f"Failed to read multimodal leaderboard CSV file for version: {version}: Status Code: {csv_response.status_code}. Please ignore this message if multimodal results are not available for this version")
|
78 |
-
|
79 |
-
github_data["text"] = text_dfs
|
80 |
github_data["multimodal"] = mm_dfs
|
81 |
-
github_data["date"] =
|
82 |
|
83 |
return github_data
|
84 |
|
@@ -136,4 +122,4 @@ def query_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
|
136 |
# Filter dataframe based on queries in 'Model' column
|
137 |
filtered_df = df[df['Model'].str.lower().str.contains('|'.join(queries))]
|
138 |
|
139 |
-
return filtered_df
|
|
|
29 |
json_data = response.json()
|
30 |
versions = json_data['versions']
|
31 |
|
|
|
32 |
version_names = sorted(
|
33 |
[ver['version'] for ver in versions],
|
34 |
+
key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))), # {{ edit_1 }}: Corrected slicing to handle 'v' prefix
|
35 |
reverse=True
|
36 |
+
)
|
|
|
37 |
|
38 |
# Get Last updated date of the latest version
|
39 |
latest_version = version_names[0]
|
40 |
latest_date = next(
|
41 |
ver['date'] for ver in versions if ver['version'] == latest_version
|
42 |
)
|
43 |
+
formatted_date = datetime.strptime(latest_date, "%Y-%m-%d").strftime("%d %b %Y") # {{ edit_1 }}: Updated date format
|
44 |
|
45 |
# Get Leaderboard data - for text-only + multimodal
|
46 |
github_data = {}
|
47 |
|
|
|
|
|
48 |
mm_dfs = []
|
49 |
+
mm_date = ""
|
50 |
+
mm_flag = True
|
51 |
for version in version_names:
|
52 |
+
# Check if version ends with 'multimodal' before constructing the URL
|
53 |
+
mm_suffix = "_multimodal" if not version.endswith('multimodal') else ""
|
54 |
+
mm_url = f"{base_repo}{version}{mm_suffix}/results.csv" # {{ edit_1 }}: Conditional suffix for multimodal
|
55 |
+
mm_response = requests.get(mm_url)
|
56 |
+
if mm_response.status_code == 200:
|
57 |
+
df = pd.read_csv(StringIO(mm_response.text))
|
58 |
df = process_df(df)
|
59 |
+
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
60 |
+
mm_dfs.append(df)
|
61 |
+
if mm_flag:
|
62 |
+
mm_date = next(ver['date'] for ver in versions if ver['version'] == version)
|
63 |
+
mm_date = datetime.strptime(mm_date, "%Y-%m-%d").strftime("%d %b %Y")
|
64 |
+
mm_flag = False
|
65 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
github_data["multimodal"] = mm_dfs
|
67 |
+
github_data["date"] = mm_date
|
68 |
|
69 |
return github_data
|
70 |
|
|
|
122 |
# Filter dataframe based on queries in 'Model' column
|
123 |
filtered_df = df[df['Model'].str.lower().str.contains('|'.join(queries))]
|
124 |
|
125 |
+
return filtered_df
|
src/plot_utils.py
CHANGED
@@ -127,27 +127,22 @@ def split_models(model_list: list):
|
|
127 |
"""
|
128 |
Split the models into open source and commercial
|
129 |
"""
|
130 |
-
|
131 |
open_models = []
|
132 |
commercial_models = []
|
133 |
-
|
134 |
-
|
135 |
# Load model registry data from main repo
|
136 |
model_registry_url = "https://raw.githubusercontent.com/clp-research/clembench/main/backends/model_registry.json"
|
137 |
response = requests.get(model_registry_url)
|
138 |
|
139 |
if response.status_code == 200:
|
140 |
json_data = json.loads(response.text)
|
141 |
-
# Classify as Open or Commercial based on the defined backend in the model registry
|
142 |
-
backend_mapping = {}
|
143 |
|
144 |
for model_name in model_list:
|
145 |
-
model_prefix = model_name.split('-')[0] # Get the prefix part of the model name
|
146 |
for entry in json_data:
|
147 |
-
if entry["model_name"]
|
148 |
-
|
149 |
-
|
150 |
-
if
|
151 |
open_models.append(model_name)
|
152 |
else:
|
153 |
commercial_models.append(model_name)
|
@@ -178,7 +173,7 @@ def update_open_models(leaderboard: str = TEXT_NAME):
|
|
178 |
Updated checkbox group for Open Models, based on the leaderboard selected
|
179 |
"""
|
180 |
github_data = get_github_data()
|
181 |
-
leaderboard_data = github_data["
|
182 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
183 |
open_models, commercial_models = split_models(models)
|
184 |
return gr.CheckboxGroup(
|
@@ -198,7 +193,7 @@ def update_closed_models(leaderboard: str = TEXT_NAME):
|
|
198 |
Updated checkbox group for Closed Models, based on the leaderboard selected
|
199 |
"""
|
200 |
github_data = get_github_data()
|
201 |
-
leaderboard_data = github_data["
|
202 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
203 |
open_models, commercial_models = split_models(models)
|
204 |
return gr.CheckboxGroup(
|
@@ -217,7 +212,7 @@ def get_plot_df(leaderboard: str = TEXT_NAME) -> pd.DataFrame:
|
|
217 |
DataFrame with model data.
|
218 |
"""
|
219 |
github_data = get_github_data()
|
220 |
-
return github_data["
|
221 |
|
222 |
|
223 |
"""
|
|
|
127 |
"""
|
128 |
Split the models into open source and commercial
|
129 |
"""
|
|
|
130 |
open_models = []
|
131 |
commercial_models = []
|
132 |
+
|
|
|
133 |
# Load model registry data from main repo
|
134 |
model_registry_url = "https://raw.githubusercontent.com/clp-research/clembench/main/backends/model_registry.json"
|
135 |
response = requests.get(model_registry_url)
|
136 |
|
137 |
if response.status_code == 200:
|
138 |
json_data = json.loads(response.text)
|
|
|
|
|
139 |
|
140 |
for model_name in model_list:
|
|
|
141 |
for entry in json_data:
|
142 |
+
if entry["model_name"] == model_name:
|
143 |
+
open_model = entry["open_weight"]
|
144 |
+
|
145 |
+
if open_model:
|
146 |
open_models.append(model_name)
|
147 |
else:
|
148 |
commercial_models.append(model_name)
|
|
|
173 |
Updated checkbox group for Open Models, based on the leaderboard selected
|
174 |
"""
|
175 |
github_data = get_github_data()
|
176 |
+
leaderboard_data = github_data["multimodal"][0]
|
177 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
178 |
open_models, commercial_models = split_models(models)
|
179 |
return gr.CheckboxGroup(
|
|
|
193 |
Updated checkbox group for Closed Models, based on the leaderboard selected
|
194 |
"""
|
195 |
github_data = get_github_data()
|
196 |
+
leaderboard_data = github_data["multimodal"][0]
|
197 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
198 |
open_models, commercial_models = split_models(models)
|
199 |
return gr.CheckboxGroup(
|
|
|
212 |
DataFrame with model data.
|
213 |
"""
|
214 |
github_data = get_github_data()
|
215 |
+
return github_data["multimodal"][0]
|
216 |
|
217 |
|
218 |
"""
|
src/version_utils.py
CHANGED
@@ -31,41 +31,30 @@ def get_versions_data():
|
|
31 |
json_data = response.json()
|
32 |
versions = json_data['versions']
|
33 |
|
34 |
-
# Sort version names - latest first
|
35 |
version_names = sorted(
|
36 |
[ver['version'] for ver in versions],
|
37 |
-
key=lambda v:
|
38 |
reverse=True
|
39 |
-
)
|
40 |
-
print(f"Found {len(version_names)} versions from get_versions_data(): {version_names}.")
|
41 |
|
42 |
# Get Last updated date of the latest version
|
43 |
latest_version = version_names[0]
|
44 |
latest_date = next(
|
45 |
ver['date'] for ver in versions if ver['version'] == latest_version
|
46 |
)
|
47 |
-
formatted_date = datetime.strptime(latest_date, "%Y
|
48 |
|
49 |
# Get Versions data
|
50 |
versions_data = {"latest": latest_version, "date": formatted_date}
|
51 |
|
52 |
-
# Collect Dataframes
|
53 |
-
dfs = []
|
54 |
|
55 |
for version in version_names:
|
56 |
-
|
57 |
-
|
58 |
-
quant_url = f"{base_repo}{version}_quantized/results.csv"
|
59 |
-
|
60 |
-
# Text Data
|
61 |
-
response = requests.get(text_url)
|
62 |
-
if response.status_code == 200:
|
63 |
-
df = pd.read_csv(StringIO(response.text))
|
64 |
-
df = process_df(df)
|
65 |
-
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
66 |
-
versions_data[version] = df
|
67 |
else:
|
68 |
-
|
|
|
|
|
69 |
|
70 |
# Multimodal Data
|
71 |
mm_response = requests.get(mm_url)
|
@@ -73,20 +62,10 @@ def get_versions_data():
|
|
73 |
mm_df = pd.read_csv(StringIO(mm_response.text))
|
74 |
mm_df = process_df(mm_df)
|
75 |
mm_df = mm_df.sort_values(by=mm_df.columns[1], ascending=False) # Sort by clemscore column
|
76 |
-
versions_data[version+
|
77 |
else:
|
78 |
print(f"Failed to read multimodal leaderboard CSV file for version: {version}: Status Code: {mm_response.status_code}. Please ignore this message if multimodal results are not available for this version")
|
79 |
|
80 |
-
# Multimodal Data
|
81 |
-
q_response = requests.get(quant_url)
|
82 |
-
if q_response.status_code == 200:
|
83 |
-
q_df = pd.read_csv(StringIO(q_response.text))
|
84 |
-
q_df = process_df(q_df)
|
85 |
-
q_df = q_df.sort_values(by=q_df.columns[1], ascending=False) # Sort by clemscore column
|
86 |
-
versions_data[version + "_quantized"] = q_df
|
87 |
-
else:
|
88 |
-
print(f"Failed to read quantized leaderboard CSV file for version: {version}: Status Code: {mm_response.status_code}. Please ignore this message if quantized results are not available for this version")
|
89 |
-
|
90 |
return versions_data
|
91 |
|
92 |
|
|
|
31 |
json_data = response.json()
|
32 |
versions = json_data['versions']
|
33 |
|
|
|
34 |
version_names = sorted(
|
35 |
[ver['version'] for ver in versions],
|
36 |
+
key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))), # {{ edit_1 }}: Corrected slicing to handle 'v' prefix
|
37 |
reverse=True
|
38 |
+
)
|
|
|
39 |
|
40 |
# Get Last updated date of the latest version
|
41 |
latest_version = version_names[0]
|
42 |
latest_date = next(
|
43 |
ver['date'] for ver in versions if ver['version'] == latest_version
|
44 |
)
|
45 |
+
formatted_date = datetime.strptime(latest_date, "%Y-%m-%d").strftime("%d %b %Y") # {{ edit_1 }}: Updated date format
|
46 |
|
47 |
# Get Versions data
|
48 |
versions_data = {"latest": latest_version, "date": formatted_date}
|
49 |
|
|
|
|
|
50 |
|
51 |
for version in version_names:
|
52 |
+
if version.endswith("multimodal"):
|
53 |
+
version_suffix = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
else:
|
55 |
+
version_suffix = "_multimodal"
|
56 |
+
|
57 |
+
mm_url = f"{base_repo}{version}{version_suffix}/results.csv"
|
58 |
|
59 |
# Multimodal Data
|
60 |
mm_response = requests.get(mm_url)
|
|
|
62 |
mm_df = pd.read_csv(StringIO(mm_response.text))
|
63 |
mm_df = process_df(mm_df)
|
64 |
mm_df = mm_df.sort_values(by=mm_df.columns[1], ascending=False) # Sort by clemscore column
|
65 |
+
versions_data[version+version_suffix] = mm_df
|
66 |
else:
|
67 |
print(f"Failed to read multimodal leaderboard CSV file for version: {version}: Status Code: {mm_response.status_code}. Please ignore this message if multimodal results are not available for this version")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
return versions_data
|
70 |
|
71 |
|