sherzod-hakimov commited on
Commit
3d9076c
·
verified ·
1 Parent(s): 3bf3a79

Upload 4 files

Browse files
src/leaderboard_utils.py CHANGED
@@ -5,11 +5,12 @@ import json
5
  from io import StringIO
6
  from datetime import datetime
7
 
8
- from src.assets.text_content import REPO
9
 
10
  def get_github_data():
11
  """
12
- Read and process data from CSV files hosted on GitHub. - https://github.com/clembench/clembench-runs
 
13
 
14
  Returns:
15
  github_data (dict): Dictionary containing:
@@ -17,54 +18,60 @@ def get_github_data():
17
  - "multimodal": List of DataFrames for each version's multimodal leaderboard data.
18
  - "date": Formatted date of the latest version in "DD Month YYYY" format.
19
  """
20
- base_repo = REPO
21
- json_url = base_repo + "benchmark_runs.json"
22
  response = requests.get(json_url)
23
 
24
  # Check if the JSON file request was successful
25
  if response.status_code != 200:
26
- print(f"Failed to read JSON file: Status Code: {response.status_code}")
27
  return None, None, None, None
28
 
29
  json_data = response.json()
30
  versions = json_data['versions']
31
 
 
32
  version_names = sorted(
33
  [ver['version'] for ver in versions],
34
- key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))), # {{ edit_1 }}: Corrected slicing to handle 'v' prefix
35
  reverse=True
36
  )
37
 
38
- # Get Last updated date of the latest version
39
- latest_version = version_names[0]
40
- latest_date = next(
41
- ver['date'] for ver in versions if ver['version'] == latest_version
42
- )
43
- formatted_date = datetime.strptime(latest_date, "%Y-%m-%d").strftime("%d %b %Y") # {{ edit_1 }}: Updated date format
 
 
 
44
 
45
- # Get Leaderboard data - for text-only + multimodal
46
- github_data = {}
47
-
48
- mm_dfs = []
49
- mm_date = ""
50
- mm_flag = True
51
  for version in version_names:
52
- # Check if version ends with 'multimodal' before constructing the URL
53
- mm_suffix = "_multimodal" if not version.endswith('multimodal') else ""
54
- mm_url = f"{base_repo}{version}{mm_suffix}/results.csv" # {{ edit_1 }}: Conditional suffix for multimodal
55
- mm_response = requests.get(mm_url)
56
- if mm_response.status_code == 200:
57
- df = pd.read_csv(StringIO(mm_response.text))
58
  df = process_df(df)
59
- df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
60
- mm_dfs.append(df)
61
- if mm_flag:
62
- mm_date = next(ver['date'] for ver in versions if ver['version'] == version)
63
- mm_date = datetime.strptime(mm_date, "%Y-%m-%d").strftime("%d %b %Y")
64
- mm_flag = False
65
-
66
- github_data["multimodal"] = mm_dfs
67
- github_data["date"] = mm_date
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  return github_data
70
 
@@ -122,4 +129,9 @@ def query_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
122
  # Filter dataframe based on queries in 'Model' column
123
  filtered_df = df[df['Model'].str.lower().str.contains('|'.join(queries))]
124
 
125
- return filtered_df
 
 
 
 
 
 
5
  from io import StringIO
6
  from datetime import datetime
7
 
8
+ from src.assets.text_content import REPO, BENCHMARK_FILE
9
 
10
  def get_github_data():
11
  """
12
+ Read and process data from CSV files hosted on GitHub. - https://github.com/clembench/clembench-runs (REPO)
13
+ Set the path in src/assets/text_content/REPO
14
 
15
  Returns:
16
  github_data (dict): Dictionary containing:
 
18
  - "multimodal": List of DataFrames for each version's multimodal leaderboard data.
19
  - "date": Formatted date of the latest version in "DD Month YYYY" format.
20
  """
21
+ json_url = REPO + BENCHMARK_FILE
 
22
  response = requests.get(json_url)
23
 
24
  # Check if the JSON file request was successful
25
  if response.status_code != 200:
26
+ print(f"Failed to read JSON file - {BENCHMARK_FILE} in repo {REPO}: Status Code: {response.status_code}")
27
  return None, None, None, None
28
 
29
  json_data = response.json()
30
  versions = json_data['versions']
31
 
32
+ # Sort the versions in benchmark by latest first
33
  version_names = sorted(
34
  [ver['version'] for ver in versions],
35
+ key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
36
  reverse=True
37
  )
38
 
39
+ # Collect Dataframes - Text and Multimodal Only - Ignoring _quantized, _backends, _ascii
40
+ text_data = {
41
+ 'version_data': [],
42
+ 'dataframes': []
43
+ }
44
+ multimodal_data = {
45
+ 'version_data': [],
46
+ 'dataframes': []
47
+ }
48
 
 
 
 
 
 
 
49
  for version in version_names:
50
+ results_url = f"{REPO}{version}/results.csv"
51
+ csv_response = requests.get(results_url)
52
+ if csv_response.status_code == 200:
53
+ df = pd.read_csv(StringIO(csv_response.text))
 
 
54
  df = process_df(df)
55
+ df = df.sort_values(by=df.columns[1], ascending=False) # Sort by Clemscore
56
+
57
+ version_data = {
58
+ 'name': version,
59
+ 'last_updated': [datetime.strptime(v['last_updated'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version],
60
+ 'release_date': [datetime.strptime(v['release_date'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version]
61
+ }
62
+
63
+ if 'multimodal' in version:
64
+ multimodal_data['dataframes'].append(df)
65
+ multimodal_data['version_data'].append(version_data)
66
+ else:
67
+ text_data['dataframes'].append(df)
68
+ text_data['version_data'].append(version_data)
69
+
70
+
71
+ github_data = {
72
+ 'text': text_data,
73
+ 'multimodal': multimodal_data
74
+ }
75
 
76
  return github_data
77
 
 
129
  # Filter dataframe based on queries in 'Model' column
130
  filtered_df = df[df['Model'].str.lower().str.contains('|'.join(queries))]
131
 
132
+ return filtered_df
133
+
134
+ if __name__=='__main__':
135
+ data = get_github_data()
136
+ print(data['text']['version_data'])
137
+ print(data['multimodal']['version_data'])
src/plot_utils.py CHANGED
@@ -4,7 +4,7 @@ import requests
4
  import json
5
  import gradio as gr
6
 
7
- from src.assets.text_content import SHORT_NAMES, TEXT_NAME, MULTIMODAL_NAME
8
  from src.leaderboard_utils import get_github_data
9
 
10
 
@@ -131,8 +131,7 @@ def split_models(model_list: list):
131
  commercial_models = []
132
 
133
  # Load model registry data from main repo
134
- model_registry_url = "https://raw.githubusercontent.com/clp-research/clembench/main/backends/model_registry.json"
135
- response = requests.get(model_registry_url)
136
 
137
  if response.status_code == 200:
138
  json_data = json.loads(response.text)
@@ -163,7 +162,7 @@ def split_models(model_list: list):
163
  """
164
  Update Functions, for when the leaderboard selection changes
165
  """
166
- def update_open_models(leaderboard: str = TEXT_NAME):
167
  """
168
  Change the checkbox group of Open Models based on the leaderboard selected
169
 
@@ -173,9 +172,9 @@ def update_open_models(leaderboard: str = TEXT_NAME):
173
  Updated checkbox group for Open Models, based on the leaderboard selected
174
  """
175
  github_data = get_github_data()
176
- leaderboard_data = github_data["multimodal"][0]
177
  models = leaderboard_data.iloc[:, 0].unique().tolist()
178
- open_models, commercial_models = split_models(models)
179
  return gr.CheckboxGroup(
180
  open_models,
181
  value=[],
@@ -183,7 +182,7 @@ def update_open_models(leaderboard: str = TEXT_NAME):
183
  interactive=True,
184
  )
185
 
186
- def update_closed_models(leaderboard: str = TEXT_NAME):
187
  """
188
  Change the checkbox group of Closed Models based on the leaderboard selected
189
 
@@ -193,9 +192,9 @@ def update_closed_models(leaderboard: str = TEXT_NAME):
193
  Updated checkbox group for Closed Models, based on the leaderboard selected
194
  """
195
  github_data = get_github_data()
196
- leaderboard_data = github_data["multimodal"][0]
197
  models = leaderboard_data.iloc[:, 0].unique().tolist()
198
- open_models, commercial_models = split_models(models)
199
  return gr.CheckboxGroup(
200
  commercial_models,
201
  value=[],
@@ -203,7 +202,7 @@ def update_closed_models(leaderboard: str = TEXT_NAME):
203
  interactive=True,
204
  )
205
 
206
- def get_plot_df(leaderboard: str = TEXT_NAME) -> pd.DataFrame:
207
  """
208
  Get the DataFrame for plotting based on the selected leaderboard.
209
  Args:
@@ -212,7 +211,7 @@ def get_plot_df(leaderboard: str = TEXT_NAME) -> pd.DataFrame:
212
  DataFrame with model data.
213
  """
214
  github_data = get_github_data()
215
- return github_data["multimodal"][0]
216
 
217
 
218
  """
 
4
  import json
5
  import gradio as gr
6
 
7
+ from src.assets.text_content import SHORT_NAMES, TEXT_NAME, MULTIMODAL_NAME, REGISTRY_URL
8
  from src.leaderboard_utils import get_github_data
9
 
10
 
 
131
  commercial_models = []
132
 
133
  # Load model registry data from main repo
134
+ response = requests.get(REGISTRY_URL)
 
135
 
136
  if response.status_code == 200:
137
  json_data = json.loads(response.text)
 
162
  """
163
  Update Functions, for when the leaderboard selection changes
164
  """
165
+ def update_open_models():
166
  """
167
  Change the checkbox group of Open Models based on the leaderboard selected
168
 
 
172
  Updated checkbox group for Open Models, based on the leaderboard selected
173
  """
174
  github_data = get_github_data()
175
+ leaderboard_data = github_data["multimodal"]['dataframes'][0]
176
  models = leaderboard_data.iloc[:, 0].unique().tolist()
177
+ open_models, _ = split_models(models)
178
  return gr.CheckboxGroup(
179
  open_models,
180
  value=[],
 
182
  interactive=True,
183
  )
184
 
185
+ def update_closed_models():
186
  """
187
  Change the checkbox group of Closed Models based on the leaderboard selected
188
 
 
192
  Updated checkbox group for Closed Models, based on the leaderboard selected
193
  """
194
  github_data = get_github_data()
195
+ leaderboard_data = github_data["multimodal"]['dataframes'][0]
196
  models = leaderboard_data.iloc[:, 0].unique().tolist()
197
+ _, commercial_models = split_models(models)
198
  return gr.CheckboxGroup(
199
  commercial_models,
200
  value=[],
 
202
  interactive=True,
203
  )
204
 
205
+ def get_plot_df() -> pd.DataFrame:
206
  """
207
  Get the DataFrame for plotting based on the selected leaderboard.
208
  Args:
 
211
  DataFrame with model data.
212
  """
213
  github_data = get_github_data()
214
+ return github_data["multimodal"]['dataframes'][0]
215
 
216
 
217
  """
src/trend_utils.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Fetch Model Registry and clemscores
2
+ import requests
3
+ import pandas as pd
4
+ from datetime import datetime
5
+ import pandas as pd
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ import numpy as np
9
+
10
+ from src.assets.text_content import REGISTRY_URL, REPO, BENCHMARK_FILE
11
+ from src.leaderboard_utils import get_github_data
12
+
13
+ # Cut-off date from where to start the trendgraph
14
+ START_DATE = '2023-06-01'
15
+
16
+ def get_param_size(params: str) -> float:
17
+ """Convert parameter size from string to float.
18
+
19
+ Args:
20
+ params (str): The parameter size as a string (e.g., '1000B', '1T').
21
+
22
+ Returns:
23
+ float: The size of parameters in float.
24
+ """
25
+ if not params:
26
+ param_size = 0
27
+ else:
28
+ if params[-1] == "B":
29
+ param_size = params[:-1]
30
+ param_size = float(param_size)
31
+ elif params[-1] == "T":
32
+ param_size = params[:-1]
33
+ param_size = float(param_size)
34
+ param_size *= 1000
35
+ else:
36
+ print("Not a valid parameter size")
37
+
38
+ return param_size
39
+
40
+ def date_difference(date_str1: str, date_str2: str) -> int:
41
+ """Calculate the difference in days between two dates.
42
+
43
+ Args:
44
+ date_str1 (str): The first date as a string in 'YYYY-MM-DD' format.
45
+ date_str2 (str): The second date as a string in 'YYYY-MM-DD' format.
46
+
47
+ Returns:
48
+ int: The difference in days between the two dates.
49
+ """
50
+ date_format = "%Y-%m-%d"
51
+ date1 = datetime.strptime(date_str1, date_format)
52
+ date2 = datetime.strptime(date_str2, date_format)
53
+ return (date1 - date2).days
54
+
55
+
56
+ def populate_list(df: pd.DataFrame, abs_diff: float) -> list:
57
+ """Create a list of models based on clemscore differences.
58
+
59
+ Args:
60
+ df (pd.DataFrame): DataFrame containing model data.
61
+ abs_diff (float): The absolute difference threshold for clemscore.
62
+
63
+ Returns:
64
+ list: A list of model names that meet the criteria.
65
+ """
66
+ l = [df.iloc[0]['model']]
67
+ prev_clemscore = df.iloc[0]['clemscore']
68
+ prev_date = df.iloc[0]['release_date']
69
+
70
+ for i in range(1, len(df)):
71
+ curr_clemscore = df.iloc[i]['clemscore']
72
+ curr_date = df.iloc[i]['release_date']
73
+ date_diff = date_difference(curr_date, prev_date)
74
+
75
+ if curr_clemscore - prev_clemscore >= abs_diff:
76
+ if date_diff == 0:
77
+ l[-1] = df.iloc[i]['model']
78
+ else:
79
+ l.append(df.iloc[i]['model'])
80
+
81
+ prev_clemscore = curr_clemscore
82
+ prev_date = curr_date
83
+
84
+ # # Add the last model if the difference between the last and previous date is greater than 15 days
85
+ # last_date = df.iloc[-1]['release_date']
86
+ # if date_difference(last_date, prev_date) > 15:
87
+ # l.append(df.iloc[-1]['model'])
88
+
89
+ return l
90
+
91
+
92
+ def get_models_to_display(result_df: pd.DataFrame, open_dip: float = 0, comm_dip: float = 0) -> tuple:
93
+ """Retrieve models to display based on clemscore differences.
94
+
95
+ Args:
96
+ result_df (pd.DataFrame): DataFrame containing model data.
97
+ open_dip (float, optional): Threshold for open models. Defaults to 0.
98
+ comm_dip (float, optional): Threshold for commercial models. Defaults to 0.
99
+
100
+ Returns:
101
+ tuple: Two lists of model names (open and commercial).
102
+ """
103
+ open_model_df = result_df[result_df['open_weight']==True]
104
+ comm_model_df = result_df[result_df['open_weight']==False]
105
+
106
+ open_model_df = open_model_df.sort_values(by='release_date', ascending=True)
107
+ comm_model_df = comm_model_df.sort_values(by='release_date', ascending=True)
108
+ open_models = populate_list(open_model_df, open_dip)
109
+ comm_models = populate_list(comm_model_df, comm_dip)
110
+ return open_models, comm_models
111
+
112
+
113
+ def get_trend_data(text_dfs: list, model_registry_data: list) -> pd.DataFrame:
114
+ """Process text data frames to extract model information.
115
+
116
+ Args:
117
+ text_dfs (list): List of DataFrames containing model information.
118
+ model_registry_data (list): List of dictionaries containing model registry data.
119
+
120
+ Returns:
121
+ pd.DataFrame: DataFrame containing processed model data.
122
+ """
123
+ visited = set() # Track models that have been processed
124
+ result_df = pd.DataFrame(columns=['model', 'clemscore', 'open_weight', 'release_date', 'parameters', 'est_flag'])
125
+
126
+ for df in text_dfs:
127
+ for i in range(len(df)):
128
+ model_name = df['Model'].iloc[i]
129
+ if model_name not in visited:
130
+ visited.add(model_name)
131
+ for dict_obj in model_registry_data:
132
+ if dict_obj["model_name"] == model_name:
133
+ if dict_obj["parameters"] == "" :
134
+ params = "1000B"
135
+ est_flag = True
136
+ else:
137
+ params = dict_obj['parameters']
138
+ est_flag = False
139
+
140
+ param_size = get_param_size(params)
141
+ new_data = {'model': model_name, 'clemscore': df['Clemscore'].iloc[i], 'open_weight':dict_obj['open_weight'],
142
+ 'release_date': dict_obj['release_date'], 'parameters': param_size, 'est_flag': est_flag}
143
+ result_df.loc[len(result_df)] = new_data
144
+ break
145
+ return result_df # Return the compiled DataFrame
146
+
147
+
148
+ def get_plot(df: pd.DataFrame, start_date: str = '2023-06-01', end_date: str = '2024-12-30',
149
+ benchmark_ticks: dict = {}, benchmark_update = {}, **plot_kwargs) -> go.Figure:
150
+ """Generate a scatter plot for the given DataFrame.
151
+
152
+ Args:
153
+ df (pd.DataFrame): DataFrame containing model data.
154
+ start_date (str, optional): Start date for filtering. Defaults to '2023-06-01'.
155
+ end_date (str, optional): End date for filtering. Defaults to '2024-12-30'.
156
+ benchmark_ticks (dict, optional): Custom benchmark ticks for the version dates. Defaults to {}.
157
+ benchmark_update (dict, optional): Custom benchmark metadata containing last_updated date for the versions. Defaults to {}.
158
+
159
+ Keyword Args:
160
+ open_dip (float, optional): Threshold for open models' clemscore differences. Max dip in clemscore allowed to be considered in trend.
161
+ comm_dip (float, optional): Threshold for commercial models' clemscore differences. Max dip in clemscore allowed to be considered in trend.
162
+ height (int, optional): Height of the plot in pixels. Adjusted for mobile or desktop views.
163
+ mobile_view (bool, optional): Flag to indicate if the plot should be optimized for mobile display. Defaults to False.
164
+
165
+ Returns:
166
+ go.Figure: The generated plot.
167
+ """
168
+
169
+ open_dip = plot_kwargs['open_dip']
170
+ comm_dip = plot_kwargs['comm_dip']
171
+ height = plot_kwargs['height']
172
+ width = plot_kwargs['width']
173
+
174
+ mobile_view = True if plot_kwargs['mobile_view'] else False
175
+
176
+ max_clemscore = df['clemscore'].max()
177
+ # Convert 'release_date' to datetime
178
+ df['Release date'] = pd.to_datetime(df['release_date'], format='ISO8601')
179
+ # Filter out data before April 2023/START_DATE
180
+ df = df[df['Release date'] >= pd.to_datetime(start_date)]
181
+ open_model_list, comm_model_list = get_models_to_display(df, open_dip, comm_dip)
182
+ models_to_display = open_model_list + comm_model_list
183
+ print(f"open_model_list: {open_model_list}, comm_model_list: {comm_model_list}")
184
+
185
+ # Create a column to indicate if the model should be labeled
186
+ df['label_model'] = df['model'].apply(lambda x: x if x in models_to_display else "")
187
+
188
+ # If mobile_view, then show only the models in models_to_display i.e. on the trend line #minimalistic
189
+ if mobile_view:
190
+ df = df[df['model'].isin(models_to_display)]
191
+
192
+ # Add an identifier column to each DataFrame
193
+ df['Model Type'] = df['open_weight'].map({True: 'Open-Weight', False: 'Commercial'})
194
+
195
+ marker_size = df['parameters'].apply(lambda x: np.sqrt(x) if x > 0 else np.sqrt(400)).astype(float) # Arbitrary sqrt value to scale marker size based on parameter size
196
+
197
+ open_color = 'red'
198
+ comm_color = 'blue'
199
+
200
+ # Create the scatter plot
201
+ fig = px.scatter(df,
202
+ x="Release date",
203
+ y="clemscore",
204
+ color="Model Type", # Differentiates the datasets by color
205
+ hover_name="model",
206
+ size=marker_size,
207
+ size_max=40, # Max size of the circles
208
+ template="plotly_white",
209
+ hover_data={ # Customize hover information
210
+ "Release date": True, # Show the release date
211
+ "clemscore": True, # Show the clemscore
212
+ "Model Type": True # Show the model type
213
+ },
214
+ custom_data=["model", "Release date", "clemscore"] # Specify custom data columns for hover
215
+ )
216
+
217
+ fig.update_traces(
218
+ hovertemplate='Model Name: %{customdata[0]}<br>Release date: %{customdata[1]}<br>Clemscore: %{customdata[2]}<br>'
219
+ )
220
+
221
+ # Sort dataframes for line plotting
222
+ df_open = df[df['model'].isin(open_model_list)].sort_values(by='Release date')
223
+ df_commercial = df[df['model'].isin(comm_model_list)].sort_values(by='Release date')
224
+
225
+ ## Custom tics for x axis
226
+ # Define the start and end dates
227
+ start_date = pd.to_datetime(start_date)
228
+ end_date = pd.to_datetime(end_date)
229
+ # Generate ticks every two months
230
+ date_range = pd.date_range(start=start_date, end=end_date, freq='2MS') # '2MS' stands for 2 Months Start frequency
231
+ # Create labels for these ticks
232
+ custom_ticks = {date: date.strftime('%b %Y') for date in date_range}
233
+
234
+ ## Benchmark Version ticks
235
+ benchmark_tickvals = list(pd.to_datetime(list(benchmark_ticks.keys())))
236
+ custom_ticks = {k:v for k,v in custom_ticks.items() if k not in benchmark_tickvals}
237
+ custom_tickvals = list(custom_ticks.keys())
238
+
239
+
240
+ for date, version in benchmark_ticks.items():
241
+ # Find the corresponding update date from benchmark_update based on the version name
242
+ update_date = next((update_date for update_date, ver in benchmark_update.items() if version in ver), None)
243
+
244
+ if update_date:
245
+ # Add vertical black dotted line for each benchmark_tick date
246
+ fig.add_shape(
247
+ go.layout.Shape(
248
+ type='line',
249
+ x0=date,
250
+ x1=date,
251
+ y0=0,
252
+ y1=1,
253
+ yref='paper',
254
+ line=dict(color='#A9A9A9', dash='dash'), # Black dotted line
255
+ )
256
+ )
257
+
258
+ # Add hover information across the full y-axis range
259
+ fig.add_trace(
260
+ go.Scatter(
261
+ x=[date]*100,
262
+ y=list(range(0,100)), # Covers full y-axis range
263
+ mode='markers',
264
+ line=dict(color='rgba(255,255,255,0)', width=0), # Fully transparent line
265
+ hovertext=[
266
+ f"Version: {version} released on {date.strftime('%d %b %Y')}, last updated on: {update_date.strftime('%d %b %Y')}"
267
+ for _ in range(100)
268
+ ], # Unique hovertext for all points
269
+ hoverinfo="text",
270
+ hoveron='points',
271
+ showlegend=False
272
+ )
273
+ )
274
+
275
+
276
+ if mobile_view:
277
+ # Remove custom_tickvals within -1 month to +1 month of benchmark_tickvals for better visibility
278
+ one_month = pd.DateOffset(months=1)
279
+ filtered_custom_tickvals = [
280
+ date for date in custom_tickvals
281
+ if not any((benchmark_date - one_month <= date <= benchmark_date + one_month) for benchmark_date in benchmark_tickvals)
282
+ ]
283
+ # Alternate <br> for benchmark ticks based on date difference (Eg. v1.6, v1.6.5 too close to each other for MM benchmark)
284
+ benchmark_tick_texts = []
285
+ for i in range(len(benchmark_tickvals)):
286
+ if i == 0:
287
+ benchmark_tick_texts.append(f"<br><br><b>{benchmark_ticks[benchmark_tickvals[i]]}</b>")
288
+ else:
289
+ date_diff = (benchmark_tickvals[i] - benchmark_tickvals[i - 1]).days
290
+ if date_diff <= 75:
291
+ benchmark_tick_texts.append(f"<br><br><br><b>{benchmark_ticks[benchmark_tickvals[i]]}</b>")
292
+ else:
293
+ benchmark_tick_texts.append(f"<br><br><b>{benchmark_ticks[benchmark_tickvals[i]]}</b>")
294
+ fig.update_xaxes(
295
+ tickvals=filtered_custom_tickvals + benchmark_tickvals, # Use filtered_custom_tickvals
296
+ ticktext=[f"{date.strftime('%b')}<br>{date.strftime('%y')}" for date in filtered_custom_tickvals] +
297
+ benchmark_tick_texts, # Use the new benchmark tick texts
298
+ tickangle=0,
299
+ tickfont=dict(size=10)
300
+ )
301
+ fig.update_yaxes(range=[0, 110]) # Set y-axis range to 110 for better visibility of legend and avoiding overlap with interactivity block of plotly on top-right
302
+ display_mode = 'lines+markers'
303
+ else:
304
+ fig.update_xaxes(
305
+ tickvals=custom_tickvals + benchmark_tickvals, # Use filtered_custom_tickvals
306
+ ticktext=[f"{date.strftime('%b')} {date.strftime('%Y')}" for date in custom_tickvals] +
307
+ [f"<br><span style='font-size:12px;'><b>{benchmark_ticks[date]}</b></span>" for date in benchmark_tickvals], # Added <br> for vertical alignment
308
+ tickangle=0,
309
+ tickfont=dict(size=10)
310
+ )
311
+ fig.update_yaxes(range=[0, max_clemscore+10])
312
+ display_mode = 'lines+markers+text'
313
+
314
+
315
+ # Add lines connecting the points for open models
316
+ fig.add_trace(go.Scatter(x=df_open['Release date'], y=df_open['clemscore'],
317
+ mode=display_mode, # Include 'text' in the mode
318
+ name='Open Models Trendline',
319
+ text=df_open['label_model'], # Use label_model for text labels
320
+ textposition='top center', # Position of the text labels
321
+ line=dict(color=open_color), showlegend=False))
322
+
323
+ # Add lines connecting the points for commercial models
324
+ fig.add_trace(go.Scatter(x=df_commercial['Release date'], y=df_commercial['clemscore'],
325
+ mode=display_mode, # Include 'text' in the mode
326
+ name='Commercial Models Trendline',
327
+ text=df_commercial['label_model'], # Use label_model for text labels
328
+ textposition='top center', # Position of the text labels
329
+ line=dict(color=comm_color), showlegend=False))
330
+
331
+
332
+ # Update layout to ensure text labels are visible
333
+ fig.update_traces(textposition='top center')
334
+
335
+ # Update the Legend Position and plot dimensions
336
+ fig.update_layout(height=height,
337
+ legend=dict(
338
+ yanchor="top",
339
+ y=0.99,
340
+ xanchor="left",
341
+ x=0.01
342
+ )
343
+ )
344
+
345
+ if width:
346
+ print("Custom Setting Width :")
347
+ fig.update_layout(width=width)
348
+
349
+ return fig
350
+
351
+ def get_final_trend_plot(mobile_view: bool = False, custom_width: int = 0) -> go.Figure:
352
+ """Fetch and generate the final trend plot for all models.
353
+
354
+ Args:
355
+ custom_width: The custom width to use for loading the graph first time.
356
+ mobile_view (bool, optional): Flag to indicate mobile view. Defaults to False.
357
+
358
+ Returns:
359
+ go.Figure: The generated trend plot for selected benchmark.
360
+ """
361
+ # Fetch Model Registry
362
+ response = requests.get(REGISTRY_URL)
363
+ model_registry_data = response.json()
364
+ # Custom tick labels
365
+ json_url = REPO + BENCHMARK_FILE
366
+ response = requests.get(json_url)
367
+
368
+ # Check if the JSON file request was successful
369
+ if response.status_code != 200:
370
+ print(f"Failed to read JSON file: Status Code: {response.status_code}")
371
+
372
+ json_data = response.json()
373
+ versions = json_data['versions']
374
+
375
+ if mobile_view:
376
+ height = 450
377
+ width = 375
378
+ else:
379
+ height = 1000
380
+ width = None
381
+
382
+ if custom_width:
383
+ width = custom_width
384
+
385
+ plot_kwargs = {'height': height, 'width': width, 'open_dip': 0, 'comm_dip': 0,
386
+ 'mobile_view': mobile_view}
387
+
388
+ benchmark_ticks = {}
389
+ benchmark_update = {}
390
+ mm_dfs = get_github_data()['multimodal']['dataframes']
391
+ result_df = get_trend_data(mm_dfs, model_registry_data)
392
+ df = result_df
393
+ for ver in versions:
394
+ if 'multimodal' in ver['version']:
395
+ temp_ver = ver['version']
396
+ temp_ver = temp_ver.replace('_multimodal', '')
397
+ benchmark_ticks[pd.to_datetime(ver['release_date'])] = temp_ver ## MM benchmark dates considered after v1.6 (incl.)
398
+ benchmark_update[pd.to_datetime(ver['last_updated'])] = temp_ver
399
+
400
+ fig = get_plot(df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, benchmark_update=benchmark_update, **plot_kwargs)
401
+
402
+ return fig
src/version_utils.py CHANGED
@@ -1,7 +1,3 @@
1
- ## REQUIRED OUTPUT ###
2
- # A list of version names -> v1.6, v.6_multimodal, v1.6_quantized, v1.5, v0.9, etc......
3
- # A corresponding DataFrame?
4
-
5
  import requests
6
  from datetime import datetime
7
  import pandas as pd
@@ -9,18 +5,18 @@ import json
9
  from io import StringIO
10
 
11
  from src.leaderboard_utils import process_df
12
- from src.assets.text_content import REPO
13
 
14
- def get_versions_data():
15
  """
16
- Read and process data from CSV files of all available versions hosted on GitHub. - https://github.com/clembench/clembench-runs
17
 
18
  Returns:
19
- versions_data:
20
  -
21
  """
22
  base_repo = REPO
23
- json_url = base_repo + "benchmark_runs.json"
24
  response = requests.get(json_url)
25
 
26
  # Check if the JSON file request was successful
@@ -33,42 +29,35 @@ def get_versions_data():
33
 
34
  version_names = sorted(
35
  [ver['version'] for ver in versions],
36
- key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))), # {{ edit_1 }}: Corrected slicing to handle 'v' prefix
37
  reverse=True
38
  )
39
 
40
- # Get Last updated date of the latest version
41
- latest_version = version_names[0]
42
- latest_date = next(
43
- ver['date'] for ver in versions if ver['version'] == latest_version
44
- )
45
- formatted_date = datetime.strptime(latest_date, "%Y-%m-%d").strftime("%d %b %Y") # {{ edit_1 }}: Updated date format
46
-
47
- # Get Versions data
48
- versions_data = {"latest": latest_version, "date": formatted_date}
49
-
50
 
51
  for version in version_names:
52
- if version.endswith("multimodal"):
53
- version_suffix = ""
54
- else:
55
- version_suffix = "_multimodal"
56
-
57
- mm_url = f"{base_repo}{version}{version_suffix}/results.csv"
 
 
 
 
 
 
 
 
58
 
59
- # Multimodal Data
60
- mm_response = requests.get(mm_url)
61
- if mm_response.status_code == 200:
62
- mm_df = pd.read_csv(StringIO(mm_response.text))
63
- mm_df = process_df(mm_df)
64
- mm_df = mm_df.sort_values(by=mm_df.columns[1], ascending=False) # Sort by clemscore column
65
- versions_data[version+version_suffix] = mm_df
66
- else:
67
- print(f"Failed to read multimodal leaderboard CSV file for version: {version}: Status Code: {mm_response.status_code}. Please ignore this message if multimodal results are not available for this version")
68
 
69
- return versions_data
70
 
71
 
72
  if __name__ == "__main__":
73
- versions_data = get_versions_data()
74
- print(versions_data.keys())
 
 
 
 
 
1
  import requests
2
  from datetime import datetime
3
  import pandas as pd
 
5
  from io import StringIO
6
 
7
  from src.leaderboard_utils import process_df
8
+ from src.assets.text_content import REPO, BENCHMARK_FILE
9
 
10
+ def get_version_data():
11
  """
12
+ Read and process data from CSV files of all available multimodal versions hosted on GitHub. - https://github.com/clembench/clembench-runs
13
 
14
  Returns:
15
+ version_data:
16
  -
17
  """
18
  base_repo = REPO
19
+ json_url = base_repo + BENCHMARK_FILE
20
  response = requests.get(json_url)
21
 
22
  # Check if the JSON file request was successful
 
29
 
30
  version_names = sorted(
31
  [ver['version'] for ver in versions],
32
+ key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
33
  reverse=True
34
  )
35
 
36
+ version_data = {
37
+ 'versions': [],
38
+ 'dataframes': []
39
+ }
 
 
 
 
 
 
40
 
41
  for version in version_names:
42
+ if 'multimodal' in version: # Only include multimodal versions
43
+ base_url = f"{base_repo}{version}/results.csv"
44
+ response = requests.get(base_url)
45
+ if response.status_code == 200:
46
+ df = pd.read_csv(StringIO(response.text))
47
+ df = process_df(df)
48
+ df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
49
+ version_data['dataframes'].append(df)
50
+ metadata = {
51
+ 'name': version,
52
+ 'last_updated': [datetime.strptime(v['last_updated'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version],
53
+ 'release_date': [datetime.strptime(v['release_date'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version]
54
+ }
55
+ version_data['versions'].append(metadata)
56
 
 
 
 
 
 
 
 
 
 
57
 
58
+ return version_data
59
 
60
 
61
  if __name__ == "__main__":
62
+ version_data = get_version_data()
63
+ print(version_data['versions'])