multimodal-clem-leaderboard

Running

App Files Files Community

sherzod-hakimov commited on Jul 3, 2024

Commit

923aff9

1 Parent(s): 18d5ac3

multimodal leaderboard

Browse files

Files changed (6) hide show

app.py +283 -92
requirements.txt +2 -1
src/assets/text_content.py +11 -2
src/leaderboard_utils.py +106 -115
src/plot_utils.py +194 -61
src/version_utils.py +95 -0

app.py CHANGED Viewed

@@ -1,89 +1,181 @@
 import gradio as gr
-from src.assets.text_content import TITLE, INTRODUCTION_TEXT, CLEMSCORE_TEXT
-from src.leaderboard_utils import filter_search, get_github_data
-from src.plot_utils import split_models, compare_plots
-# For Leaderboards
-dataframe_height = 800 # Height of the table in pixels
-# Get CSV data
-global primary_leaderboard_df, version_dfs, version_names
-primary_leaderboard_df, version_dfs, version_names, date = get_github_data()
-global prev_df
-prev_df = version_dfs[0]
-def select_prev_df(name):
-    ind = version_names.index(name)
-    prev_df = version_dfs[ind]
-    return prev_df
-# For Plots
-global plot_df, OPEN_MODELS, CLOSED_MODELS
-plot_df = primary_leaderboard_df[0]
-MODELS = list(plot_df[list(plot_df.columns)[0]].unique())
-OPEN_MODELS, CLOSED_MODELS = split_models(MODELS)
-# MAIN APPLICATION s
-main_app = gr.Blocks()
-with main_app:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🥇 CLEM Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Row():
                 search_bar = gr.Textbox(
                     placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
                     show_label=False,
                     elem_id="search-bar",
                 )
             leaderboard_table = gr.Dataframe(
-                value=primary_leaderboard_df[0],
-                elem_id="leaderboard-table",
                 interactive=False,
                 visible=True,
                 height=dataframe_height
             )
             gr.HTML(CLEMSCORE_TEXT)
-            gr.HTML(f"Last updated - {date}")
-            # Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
             dummy_leaderboard_table = gr.Dataframe(
-                value=primary_leaderboard_df[0],
-                elem_id="leaderboard-table",
                 interactive=False,
                 visible=False
             )
             search_bar.submit(
-                filter_search,
                 [dummy_leaderboard_table, search_bar],
                 leaderboard_table,
                 queue=True
             )
-        with gr.TabItem("📈 Plot", id=3):
             with gr.Row():
-                open_models_selection = gr.CheckboxGroup(
-                    OPEN_MODELS,
-                    label="Open-weight Models 🌐",
-                    value=[],
-                    elem_id="value-select",
-                    interactive=True,
                 )
-            with gr.Row():
-                closed_models_selection = gr.CheckboxGroup(
-                    CLOSED_MODELS,
-                    label="Closed-weight Models 💼",
-                    value=[],
-                    elem_id="value-select-2",
-                    interactive=True,
-                )
             with gr.Row():
                 with gr.Column():
                     show_all = gr.CheckboxGroup(
@@ -93,36 +185,41 @@ with main_app:
                         elem_id="value-select-3",
                         interactive=True,
                     )
                 with gr.Column():
                     show_names = gr.CheckboxGroup(
                         ["Show Names"],
-                        label ="Show names of models on the plot 🏷️",
                         value=[],
                         elem_id="value-select-4",
                         interactive=True,
-                    )
                 with gr.Column():
                     show_legend = gr.CheckboxGroup(
                         ["Show Legend"],
-                        label ="Show legend on the plot 💡",
                         value=[],
                         elem_id="value-select-5",
                         interactive=True,
-                    )
                 with gr.Column():
                     mobile_view = gr.CheckboxGroup(
                         ["Mobile View"],
-                        label ="View plot on smaller screens 📱",
                         value=[],
                         elem_id="value-select-6",
                         interactive=True,
-                    )
             with gr.Row():
                 dummy_plot_df = gr.DataFrame(
-                    value=plot_df,
                     visible=False
                 )
@@ -131,90 +228,184 @@ with main_app:
                     # Output block for the plot
                     plot_output = gr.Plot()
             open_models_selection.change(
-                compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
-                plot_output,
                 queue=True
             )
             closed_models_selection.change(
-                compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
-                plot_output,
                 queue=True
             )
             show_all.change(
-                compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
-                plot_output,
                 queue=True
             )
             show_names.change(
-                compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
-                plot_output,
                 queue=True
             )
             show_legend.change(
-                compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
-                plot_output,
                 queue=True
             )
             mobile_view.change(
-                compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
-                plot_output,
                 queue=True
             )
-        with gr.TabItem("🔄 Versions and Details", elem_id="details", id=2):
             with gr.Row():
                 version_select = gr.Dropdown(
-                    version_names, label="Select Version 🕹️", value=version_names[0]
                 )
             with gr.Row():
                 search_bar_prev = gr.Textbox(
                     placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
                     show_label=False,
-                    elem_id="search-bar-2",
                 )
             prev_table = gr.Dataframe(
-                value=prev_df,
-                elem_id="leaderboard-table",
                 interactive=False,
                 visible=True,
                 height=dataframe_height
             )
             dummy_prev_table = gr.Dataframe(
-                value=prev_df,
-                elem_id="leaderboard-table",
                 interactive=False,
                 visible=False
             )
             search_bar_prev.submit(
-                filter_search,
                 [dummy_prev_table, search_bar_prev],
                 prev_table,
                 queue=True
             )
             version_select.change(
-                select_prev_df,
                 [version_select],
                 prev_table,
                 queue=True
             )
-    main_app.load()
-main_app.queue()
-main_app.launch()

 import gradio as gr
+import os
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import HfApi
+from datetime import datetime, timedelta
+from src.assets.text_content import TITLE, INTRODUCTION_TEXT, CLEMSCORE_TEXT, MULTIMODAL_NAME, TEXT_NAME, HF_REPO
+from src.leaderboard_utils import query_search, get_github_data
+from src.plot_utils import split_models, plotly_plot, get_plot_df, update_open_models, update_closed_models
+from src.plot_utils import reset_show_all, reset_show_names, reset_show_legend, reset_mobile_view
+from src.version_utils import get_versions_data
+"""
+CONSTANTS
+"""
+# For restarting the gradio application every 24 Hrs
+TIME = 43200  # in seconds # Reload will not work locally - requires HFToken # The app launches locally as expected - only without the reload utility
+# For Leaderboard table
+dataframe_height = 800  # Height of the table in pixels # Set on average considering all possible devices
+"""
+AUTO RESTART HF SPACE
+"""
+HF_TOKEN = os.environ.get("H4_TOKEN", None)
+api = HfApi()
+def restart_space():
+    api.restart_space(repo_id=HF_REPO, token=HF_TOKEN)
+"""
+GITHUB UTILS
+"""
+github_data = get_github_data()
+text_leaderboard = github_data["text"][0]  # Get the text-only leaderboard for its available latest version
+multimodal_leaderboard = github_data["multimodal"][0]  # Get multimodal leaderboard for its available latest version.
+# Show only First 4 columns for the leaderboards
+text_leaderboard = text_leaderboard.iloc[:, :4]
+print(f"Showing the following columns for the latest leaderboard: {text_leaderboard.columns}")
+multimodal_leaderboard = multimodal_leaderboard.iloc[:, :4]
+print(f"Showing the following columns for the multimodal leaderboard: {multimodal_leaderboard.columns}")
+"""
+VERSIONS UTILS
+"""
+versions_data = get_versions_data()
+latest_version = versions_data['latest']  # Always show latest version in text-only benchmark
+last_updated_date = versions_data['date']
+version_names = list(versions_data.keys())
+version_names = [v for v in version_names if v.startswith("v")]  # Remove "latest" and "date" keys
+global version_df
+version_df = versions_data[latest_version]
+def select_version_df(name):
+    return versions_data[name]
+"""
+MAIN APPLICATION
+"""
+hf_app = gr.Blocks()
+with hf_app:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        """
+        #######################        FIRST TAB - TEXT-LEADERBOARD       #######################
+        """
+        with gr.TabItem(TEXT_NAME, elem_id="llm-benchmark-tab-table", id=0):
             with gr.Row():
                 search_bar = gr.Textbox(
                     placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
                     show_label=False,
                     elem_id="search-bar",
                 )
             leaderboard_table = gr.Dataframe(
+                value=text_leaderboard,
+                elem_id="text-leaderboard-table",
                 interactive=False,
                 visible=True,
                 height=dataframe_height
             )
+            # Show information about the clemscore and last updated date below the table
             gr.HTML(CLEMSCORE_TEXT)
+            gr.HTML(f"Last updated - {github_data['date']}")
+            # Add a dummy leaderboard to handle search queries in leaderboard_table
+            # This will show a temporary leaderboard based on the searched value
             dummy_leaderboard_table = gr.Dataframe(
+                value=text_leaderboard,
+                elem_id="text-leaderboard-table-dummy",
                 interactive=False,
                 visible=False
             )
+            # Action after submitting a query to the search bar
             search_bar.submit(
+                query_search,
                 [dummy_leaderboard_table, search_bar],
                 leaderboard_table,
                 queue=True
             )
+        """
+        #######################       SECOND TAB - MULTIMODAL LEADERBOARD     #######################
+        """
+        with gr.TabItem(MULTIMODAL_NAME, elem_id="mm-llm-benchmark-tab-table", id=1):
             with gr.Row():
+                mm_search_bar = gr.Textbox(
+                    placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
+                    show_label=False,
+                    elem_id="search-bar",
                 )
+            mm_leaderboard_table = gr.Dataframe(
+                value=multimodal_leaderboard,
+                elem_id="mm-leaderboard-table",
+                interactive=False,
+                visible=True,
+                height=dataframe_height
+            )
+            # Show information about the clemscore and last updated date below the table
+            gr.HTML(CLEMSCORE_TEXT)
+            gr.HTML(f"Last updated - {github_data['date']}")
+            # Add a dummy leaderboard to handle search queries in leaderboard_table
+            # This will show a temporary leaderboard based on the searched value
+            mm_dummy_leaderboard_table = gr.Dataframe(
+                value=multimodal_leaderboard,
+                elem_id="mm-leaderboard-table-dummy",
+                interactive=False,
+                visible=False
+            )
+            # Action after submitting a query to the search bar
+            mm_search_bar.submit(
+                query_search,
+                [mm_dummy_leaderboard_table, mm_search_bar],
+                mm_leaderboard_table,
+                queue=True
+            )
+        """
+        #######################       THIRD TAB - PLOTS - %PLAYED V/S QUALITY SCORE     #######################
+        """
+        with gr.TabItem("📈 Plots", elem_id="plots", id=2):
+            """
+            DropDown Select for Text/Multimodal Leaderboard
+            """
+            leaderboard_selection = gr.Dropdown(
+                choices=[TEXT_NAME, MULTIMODAL_NAME],
+                value=TEXT_NAME,
+                label="Select Leaderboard 🎖️🔽",
+                elem_id="value-select-0",
+                interactive=True
+            )
+            """
+            Accordion Groups to select individual models - Hidden by default
+            """
+            with gr.Accordion("Select Open-weight Models 🌐", open=False):
+                open_models_selection = update_open_models()
+                clear_button_1 = gr.ClearButton(open_models_selection)
+            with gr.Accordion("Select Commercial Models 💰", open=False):
+                closed_models_selection = update_closed_models()
+                clear_button_2 = gr.ClearButton(closed_models_selection)
+            """
+            Checkbox group to control the layout of the plot
+            """
             with gr.Row():
                 with gr.Column():
                     show_all = gr.CheckboxGroup(
                         elem_id="value-select-3",
                         interactive=True,
                     )
                 with gr.Column():
                     show_names = gr.CheckboxGroup(
                         ["Show Names"],
+                        label="Show names of models on the plot 🏷️",
                         value=[],
                         elem_id="value-select-4",
                         interactive=True,
+                    )
                 with gr.Column():
                     show_legend = gr.CheckboxGroup(
                         ["Show Legend"],
+                        label="Show legend on the plot 💡",
                         value=[],
                         elem_id="value-select-5",
                         interactive=True,
+                    )
                 with gr.Column():
                     mobile_view = gr.CheckboxGroup(
                         ["Mobile View"],
+                        label="View plot on smaller screens 📱",
                         value=[],
                         elem_id="value-select-6",
                         interactive=True,
+                    )
+            """
+            PLOT BLOCK
+            """
+            # Create a dummy DataFrame as an input to the plotly_plot function.
+            # Uses this data to plot the %played v/s quality score
             with gr.Row():
                 dummy_plot_df = gr.DataFrame(
+                    value=get_plot_df(),
                     visible=False
                 )
                     # Output block for the plot
                     plot_output = gr.Plot()
+            """
+            PLOT CHANGE ACTIONS
+            Toggle 'Select All Models' based on the values in Accordion checkbox groups
+            """
             open_models_selection.change(
+                plotly_plot,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend,
+                 mobile_view],
+                [plot_output],
                 queue=True
             )
             closed_models_selection.change(
+                plotly_plot,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend,
+                 mobile_view],
+                [plot_output],
                 queue=True
             )
             show_all.change(
+                plotly_plot,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend,
+                 mobile_view],
+                [plot_output],
                 queue=True
             )
             show_names.change(
+                plotly_plot,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend,
+                 mobile_view],
+                [plot_output],
                 queue=True
             )
             show_legend.change(
+                plotly_plot,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend,
+                 mobile_view],
+                [plot_output],
                 queue=True
             )
             mobile_view.change(
+                plotly_plot,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend,
+                 mobile_view],
+                [plot_output],
+                queue=True
+            )
+            """
+            LEADERBOARD SELECT CHANGE ACTIONS
+            Update Checkbox Groups and Dummy DF based on the leaderboard selected
+            """
+            leaderboard_selection.change(
+                update_open_models,
+                [leaderboard_selection],
+                [open_models_selection],
+                queue=True
+            )
+            leaderboard_selection.change(
+                update_closed_models,
+                [leaderboard_selection],
+                [closed_models_selection],
+                queue=True
+            )
+            leaderboard_selection.change(
+                get_plot_df,
+                [leaderboard_selection],
+                [dummy_plot_df],
                 queue=True
             )
+            ## Implement Feature - Reset Plot when Leaderboard selection changes
+            leaderboard_selection.change(
+                reset_show_all,
+                outputs=[show_all],
+                queue=True
+            )
+            open_models_selection.change(
+                reset_show_all,
+                outputs=[show_all],
+                queue=True
+            )
+            closed_models_selection.change(
+                reset_show_all,
+                outputs=[show_all],
+                queue=True
+            )
+            leaderboard_selection.change(
+                reset_show_names,
+                outputs=[show_names],
+                queue=True
+            )
+            leaderboard_selection.change(
+                reset_show_legend,
+                outputs=[show_legend],
+                queue=True
+            )
+            leaderboard_selection.change(
+                reset_mobile_view,
+                outputs=[mobile_view],
+                queue=True
+            )
+        """
+        #######################       FOURTH TAB - VERSIONS AND DETAILS     #######################
+        """
+        with gr.TabItem("🔄 Versions and Details", elem_id="versions-details-tab", id=3):
             with gr.Row():
                 version_select = gr.Dropdown(
+                    version_names, label="Select Version 🕹️", value=latest_version
                 )
             with gr.Row():
                 search_bar_prev = gr.Textbox(
                     placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
                     show_label=False,
+                    elem_id="search-bar-3",
                 )
             prev_table = gr.Dataframe(
+                value=version_df,
+                elem_id="version-leaderboard-table",
                 interactive=False,
                 visible=True,
                 height=dataframe_height
             )
             dummy_prev_table = gr.Dataframe(
+                value=version_df,
+                elem_id="version-dummy-leaderboard-table",
                 interactive=False,
                 visible=False
             )
+            gr.HTML(CLEMSCORE_TEXT)
+            gr.HTML(f"Last updated - {last_updated_date}")
             search_bar_prev.submit(
+                query_search,
                 [dummy_prev_table, search_bar_prev],
                 prev_table,
                 queue=True
             )
             version_select.change(
+                select_version_df,
                 [version_select],
                 prev_table,
                 queue=True
             )
+            # Update Dummy Leaderboard, when changing versions
+            version_select.change(
+                select_version_df,
+                [version_select],
+                dummy_prev_table,
+                queue=True
+            )
+    hf_app.load()
+hf_app.queue()
+# Add scheduler to auto-restart the HF space at every TIME interval and update every component each time
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, 'interval', seconds=TIME)
+scheduler.start()
+# Log current start time and scheduled restart time
+print(datetime.now())
+print(f"Scheduled restart at {datetime.now() + timedelta(seconds=TIME)}")
+hf_app.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 gradio==4.36.1
 pandas==2.0.0
-plotly==5.18.0

 gradio==4.36.1
 pandas==2.0.0
+plotly==5.18.0
+apscheduler==3.10.4

src/assets/text_content.py CHANGED Viewed

@@ -1,11 +1,20 @@
 TITLE = """<h1 align="center" id="space-title"> 🏆 CLEM Leaderboard</h1>"""
 INTRODUCTION_TEXT = """
 <h6 align="center">
 The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”.
 The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://aclanthology.org/2023.emnlp-main.689.pdf).
 Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench)
 All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs) </h6>
@@ -52,6 +61,6 @@ SHORT_NAMES = {
     "vicuna-7b-v1.5": "vic-7b-v1.5",
     "vicuna-13b-v1.5": "vic-13b-v1.5",
     "gpt4all-13b-snoozy": "g4a-13b-s",
-    "zephyr-7b-alpha":"z-7b-a",
-    "zephyr-7b-beta":"z-7b-b"
 }

 TITLE = """<h1 align="center" id="space-title"> 🏆 CLEM Leaderboard</h1>"""
+REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
+HF_REPO = "colab-potsdam/clem-leaderboard"
+TEXT_NAME = "🥇 CLEM Leaderboard"
+MULTIMODAL_NAME = "🥇 Multimodal CLEM Leaderboard"
 INTRODUCTION_TEXT = """
 <h6 align="center">
 The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”.
 The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://aclanthology.org/2023.emnlp-main.689.pdf).
+The Multimodal Benchmark is descrbied in [Two Giraffes in a Dirt Field: Using Game Play to Investigate Situation Modelling in Large Multimodal Models](https://arxiv.org/abs/2406.14035)
 Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench)
 All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs) </h6>
     "vicuna-7b-v1.5": "vic-7b-v1.5",
     "vicuna-13b-v1.5": "vic-13b-v1.5",
     "gpt4all-13b-snoozy": "g4a-13b-s",
+    "zephyr-7b-alpha": "z-7b-a",
+    "zephyr-7b-beta": "z-7b-b"
 }

src/leaderboard_utils.py CHANGED Viewed

@@ -1,148 +1,139 @@
 import os
 import pandas as pd
-import requests, json
 from io import StringIO
 from datetime import datetime
 def get_github_data():
     """
-    Get data from csv files on Github
-    Args:
-        None
     Returns:
-        latest_df: singular list containing dataframe of the latest version of the leaderboard with only 4 columns
-        all_dfs: list of dataframes for previous versions + latest version including columns for all games
-        all_vnames: list of the names for the previous versions + latest version (For Details and Versions Tab Dropdown)
     """
-    uname = "clembench"
-    repo = "clembench-runs"
-    json_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/benchmark_runs.json"
-    resp = requests.get(json_url)
-    if resp.status_code == 200:
-        json_data = json.loads(resp.text)
-        versions = json_data['versions']
-        version_names = []
-        csv_url = f"https://raw.githubusercontent.com/{uname}/{repo}/main/"
-        for ver in versions:
-            version_names.append(ver['version'])
-            csv_path = ver['result_file'].split('/')[1:]
-            csv_path = '/'.join(csv_path)
-        # Sort by latest version
-        float_content = [float(s[1:]) for s in version_names]
-        float_content.sort(reverse=True)
-        version_names = ['v'+str(s) for s in float_content]
-        # Get date of latest version
-        for data in versions:
-            if data['version'] == version_names[0]:
-                date = data['date'] # Should be in YYYY/MM/DD format
-                date_obj = datetime.strptime(date, "%Y/%m/%d")
-                date = date_obj.strftime("%d %b %Y")
-        DFS = []
-        for version in version_names:
-            result_url = csv_url+ version + '/' + csv_path
-            csv_response = requests.get(result_url)
-            if csv_response.status_code == 200:
-                df = pd.read_csv(StringIO(csv_response.text))
                 df = process_df(df)
-                df = df.sort_values(by=list(df.columns)[1], ascending=False) # Sort by clemscore
-                DFS.append(df)
-            else:
-                print(f"Failed to read CSV file for version : {version}. Status Code : {resp.status_code}")
-        # Only keep relevant columns for the main leaderboard
-        latest_df_dummy = DFS[0]
-        all_columns = list(latest_df_dummy.columns)
-        keep_columns = all_columns[0:4]
-        latest_df_dummy = latest_df_dummy.drop(columns=[c for c in all_columns if c not in keep_columns])
-        latest_df = [latest_df_dummy]
-        all_dfs = []
-        all_vnames = []
-        for df, name in zip(DFS, version_names):
-            all_dfs.append(df)
-            all_vnames.append(name)
-        return latest_df, all_dfs, all_vnames, date
-    else:
-        print(f"Failed to read JSON file: Status Code : {resp.status_code}")
 def process_df(df: pd.DataFrame) -> pd.DataFrame:
     """
-    Process dataframe
-    - Remove repition in model names
-    - Convert datatypes to sort by "float" instead of "str" for sorting
     - Update column names
     Args:
         df: Unprocessed Dataframe (after using update_cols)
     Returns:
         df: Processed Dataframe
     """
-    # Change column type to float from str
-    list_column_names = list(df.columns)
-    model_col_name = list_column_names[0]
-    for col in list_column_names:
-        if col != model_col_name:
-            df[col] = df[col].astype(float)
-    # Remove repetition in model names, if any
-    models_list = []
-    for i in range(len(df)):
-        model_name = df.iloc[i][model_col_name]
-        splits = model_name.split('--')
-        splits = [split.replace('-t0.0', '') for split in splits] # Comment to not remove -t0.0
-        if splits[0] == splits[1]:
-            models_list.append(splits[0])
-        else:
-            models_list.append(splits[0] + "--" + splits[1])
-    df[model_col_name] = models_list
     # Update column names
-    update = ['Model', 'Clemscore', '% Played', 'Quality Score']
-    game_metrics = list_column_names[4:]
-    for col in game_metrics:
-        splits = col.split(',')
-        update.append(splits[0].capitalize() + "" + splits[1])
-    map_cols = {}
-    for i in range(len(update)):
-        map_cols[list_column_names[i]] = str(update[i])
-    df = df.rename(columns=map_cols)
     return df
-def filter_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
     """
-    Filter the dataframe based on the search query
     Args:
-        df: Unfiltered dataframe
-        query: a string of queries separated by ";"
-    Return:
-        filtered_df: Dataframe containing searched queries in the 'Model' column
     """
-    queries = query.split(';')
-    list_cols = list(df.columns)
-    df_len = len(df)
-    filtered_models = []
-    models_list = list(df[list_cols[0]])
-    for q in queries:
-        q = q.lower()
-        q = q.strip()
-        for i in range(df_len):
-            model_name = models_list[i]
-            if q in model_name.lower():
-                filtered_models.append(model_name) # Append model names containing query q
-    filtered_df = df[df[list_cols[0]].isin(filtered_models)]
-    if query == "":
         return df
     return filtered_df

 import os
 import pandas as pd
+import requests
+import json
 from io import StringIO
 from datetime import datetime
+from src.assets.text_content import REPO
 def get_github_data():
     """
+    Read and process data from CSV files hosted on GitHub. - https://github.com/clembench/clembench-runs
     Returns:
+        github_data (dict): Dictionary containing:
+            - "text": List of DataFrames for each version's textual leaderboard data.
+            - "multimodal": List of DataFrames for each version's multimodal leaderboard data.
+            - "date": Formatted date of the latest version in "DD Month YYYY" format.
     """
+    base_repo = REPO
+    json_url = base_repo + "benchmark_runs.json"
+    response = requests.get(json_url)
+    # Check if the JSON file request was successful
+    if response.status_code != 200:
+        print(f"Failed to read JSON file: Status Code: {response.status_code}")
+        return None, None, None, None
+    json_data = response.json()
+    versions = json_data['versions']
+    # Sort version names - latest first
+    version_names = sorted(
+        [ver['version'] for ver in versions],
+        key=lambda v: float(v[1:]),
+        reverse=True
+    )
+    print(f"Found {len(version_names)} versions from get_github_data(): {version_names}.")
+    # Get Last updated date of the latest version
+    latest_version = version_names[0]
+    latest_date = next(
+        ver['date'] for ver in versions if ver['version'] == latest_version
+    )
+    formatted_date = datetime.strptime(latest_date, "%Y/%m/%d").strftime("%d %b %Y")
+    # Get Leaderboard data - for text-only + multimodal
+    github_data = {}
+    # Collect Dataframes
+    text_dfs = []
+    mm_dfs = []
+    for version in version_names:
+        # Collect CSV data in descending order of clembench-runs versions
+        # Collect Text-only data
+        text_url = f"{base_repo}{version}/results.csv"
+        csv_response = requests.get(text_url)
+        if csv_response.status_code == 200:
+            df = pd.read_csv(StringIO(csv_response.text))
+            df = process_df(df)
+            df = df.sort_values(by=df.columns[1], ascending=False)  # Sort by clemscore column
+            text_dfs.append(df)
+        else:
+            print(f"Failed to read Text-only leaderboard CSV file for version: {version}. Status Code: {csv_response.status_code}")
+        # Collect Multimodal data
+        if float(version[1:]) >= 1.6:
+            mm_url = f"{base_repo}{version}_multimodal/results.csv"
+            mm_response = requests.get(mm_url)
+            if mm_response.status_code == 200:
+                df = pd.read_csv(StringIO(mm_response.text))
                 df = process_df(df)
+                df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
+                mm_dfs.append(df)
+        else:
+            print(f"Failed to read multimodal leaderboard CSV file for version: {version}: Status Code: {csv_response.status_code}. Please ignore this message if multimodal results are not available for this version")
+    github_data["text"] = text_dfs
+    github_data["multimodal"] = mm_dfs
+    github_data["date"] = formatted_date
+    return github_data
 def process_df(df: pd.DataFrame) -> pd.DataFrame:
     """
+    Process dataframe:
+    - Convert datatypes to sort by "float" instead of "str"
+    - Remove repetition in model names
     - Update column names
     Args:
         df: Unprocessed Dataframe (after using update_cols)
     Returns:
         df: Processed Dataframe
     """
+    # Convert column values to float, apart from the model names column
+    for col in df.columns[1:]:
+        df[col] = pd.to_numeric(df[col], errors='coerce')
+    # Remove repetition in model names
+    df[df.columns[0]] = df[df.columns[0]].str.replace('-t0.0', '', regex=True)
+    df[df.columns[0]] = df[df.columns[0]].apply(lambda x: '--'.join(set(x.split('--'))))
     # Update column names
+    custom_column_names = ['Model', 'Clemscore', '% Played', 'Quality Score']
+    for i, col in enumerate(df.columns[4:]):  # Start Capitalizing from the 5th column
+        parts = col.split(',')
+        custom_name = f"{parts[0].strip().capitalize()} {parts[1].strip()}"
+        custom_column_names.append(custom_name)
+    # Rename columns
+    df.columns = custom_column_names
     return df
+def query_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
     """
+    Filter the dataframe based on the search query.
     Args:
+        df (pd.DataFrame): Unfiltered dataframe.
+        query (str): A string of queries separated by ";".
+    Returns:
+        pd.DataFrame: Filtered dataframe containing searched queries in the 'Model' column.
     """
+    if not query.strip():  # Reset Dataframe if empty query is passed
         return df
+    queries = [q.strip().lower() for q in query.split(';') if q.strip()]  # Normalize and split queries
+    # Filter dataframe based on queries in 'Model' column
+    filtered_df = df[df['Model'].str.lower().str.contains('|'.join(queries))]
     return filtered_df

src/plot_utils.py CHANGED Viewed

@@ -1,22 +1,31 @@
 import pandas as pd
 import plotly.express as px
-from src.assets.text_content import SHORT_NAMES
-def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list, MOBILE:list ):
-    '''
     Takes in a list of models for a plotly plot
     Args:
         df: A dummy dataframe of latest version
-        LIST: List of models to plot
-        ALL: Either [] or ["Show All Models"] - toggle view to plot all models
-        NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
-        LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
-        MOBILE: Either [] or ["Mobile View"] - toggle view to for smaller screens
     Returns:
-        Fig: plotly figure
-    '''
     # Get list of all models and append short names column to df
     list_columns = list(df.columns)
     ALL_LIST = list(df[list_columns[0]].unique())
@@ -24,25 +33,24 @@ def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list, M
     list_short_names = list(short_names.values())
     df["Short"] = list_short_names
-    if ALL:
         LIST = ALL_LIST
     # Filter dataframe based on the provided list of models
     df = df[df[list_columns[0]].isin(LIST)]
-    if NAMES:
         fig = px.scatter(df, x=list_columns[2], y=list_columns[3], color=list_columns[0], symbol=list_columns[0],
-                 color_discrete_map={"category1": "blue", "category2": "red"},
-                 hover_name=list_columns[0], template="plotly_white", text="Short")
         fig.update_traces(textposition='top center')
     else:
         fig = px.scatter(df, x=list_columns[2], y=list_columns[3], color=list_columns[0], symbol=list_columns[0],
-                    color_discrete_map={"category1": "blue", "category2": "red"},
-                    hover_name=list_columns[0], template="plotly_white")
-    if not LEGEND:
         fig.update_layout(showlegend=False)
     fig.update_layout(
         xaxis_title='% Played',
         yaxis_title='Quality Score',
@@ -53,11 +61,10 @@ def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list, M
     fig.update_xaxes(range=[-5, 105])
     fig.update_yaxes(range=[-5, 105])
-    if MOBILE:
         fig.update_layout(height=300)
-    if MOBILE and LEGEND:
         fig.update_layout(height=450)
         fig.update_layout(legend=dict(
             yanchor="bottom",
@@ -75,28 +82,6 @@ def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list, M
     return fig
-# ['Model', 'Clemscore', 'All(Played)', 'All(Quality Score)']
-def compare_plots(df: pd.DataFrame, LIST1: list, LIST2: list, ALL:list, NAMES:list, LEGEND: list, MOBILE: list):
-    '''
-    Quality Score v/s % Played plot by selecting models
-    Args:
-        df: A dummy dataframe of latest version
-        LIST1: The list of open source models to show in the plot, updated from frontend
-        LIST2: The list of commercial models to show in the plot, updated from frontend
-        ALL: Either [] or ["Show All Models"] - toggle view to plot all models
-        NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
-        LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
-        MOBILE: Either [] or ["Mobile View"] - toggle view to for smaller screens
-    Returns:
-        fig: The plot
-    '''
-    # Combine lists for Open source and commercial models
-    LIST = LIST1 + LIST2
-    fig = plotly_plot(df, LIST, ALL, NAMES, LEGEND, MOBILE)
-    return fig
 def shorten_model_name(full_name):
     # Split the name into parts
     parts = full_name.split('-')
@@ -111,19 +96,20 @@ def shorten_model_name(full_name):
         short_name = '-'.join(short_name_parts)
         # Remove any leading or trailing hyphens
-        short_name = full_name[0] + '-'+ short_name.strip('-')
     return short_name
 def label_map(model_list: list) -> dict:
-    '''
     Generate a map from long names to short names, to plot them in frontend graph
     Define the short names in src/assets/text_content.py
     Args:
         model_list: A list of long model names
     Returns:
         short_name: A dict from long to short name
-    '''
     short_names = {}
     for model_name in model_list:
         if model_name in SHORT_NAMES:
@@ -135,20 +121,167 @@ def label_map(model_list: list) -> dict:
         short_names[model_name] = short_name
     return short_names
-def split_models(MODEL_LIST: list):
-    '''
     Split the models into open source and commercial
-    '''
     open_models = []
-    comm_models = []
-    for model in MODEL_LIST:
-        if model.startswith(('gpt-', 'claude-', 'command')):
-            comm_models.append(model)
-        else:
-            open_models.append(model)
     open_models.sort(key=lambda o: o.upper())
-    comm_models.sort(key=lambda c: c.upper())
-    return open_models, comm_models

 import pandas as pd
 import plotly.express as px
+import requests
+import json
+import gradio as gr
+from src.assets.text_content import SHORT_NAMES, TEXT_NAME, MULTIMODAL_NAME
+from src.leaderboard_utils import get_github_data
+def plotly_plot(df: pd.DataFrame, list_op: list, list_co: list,
+                show_all: list, show_names: list, show_legend: list,
+                mobile_view: list):
+    """
     Takes in a list of models for a plotly plot
     Args:
         df: A dummy dataframe of latest version
+        list_op: The list of open source models to show in the plot, updated from frontend
+        list_co: The list of commercial models to show in the plot, updated from frontend
+        show_all: Either [] or ["Show All Models"] - toggle view to plot all models
+        show_names: Either [] or ["Show Names"] - toggle view to show model names on plot
+        show_legend: Either [] or ["Show Legend"] - toggle view to show legend on plot
+        mobile_view: Either [] or ["Mobile View"] - toggle view to for smaller screens
     Returns:
+        Fig: plotly figure of % played v/s quality score
+    """
+    LIST = list_op + list_co
     # Get list of all models and append short names column to df
     list_columns = list(df.columns)
     ALL_LIST = list(df[list_columns[0]].unique())
     list_short_names = list(short_names.values())
     df["Short"] = list_short_names
+    if show_all:
         LIST = ALL_LIST
     # Filter dataframe based on the provided list of models
     df = df[df[list_columns[0]].isin(LIST)]
+    if show_names:
         fig = px.scatter(df, x=list_columns[2], y=list_columns[3], color=list_columns[0], symbol=list_columns[0],
+                         color_discrete_map={"category1": "blue", "category2": "red"},
+                         hover_name=list_columns[0], template="plotly_white", text="Short")
         fig.update_traces(textposition='top center')
     else:
         fig = px.scatter(df, x=list_columns[2], y=list_columns[3], color=list_columns[0], symbol=list_columns[0],
+                         color_discrete_map={"category1": "blue", "category2": "red"},
+                         hover_name=list_columns[0], template="plotly_white")
+    if not show_legend:
         fig.update_layout(showlegend=False)
     fig.update_layout(
         xaxis_title='% Played',
         yaxis_title='Quality Score',
     fig.update_xaxes(range=[-5, 105])
     fig.update_yaxes(range=[-5, 105])
+    if mobile_view:
         fig.update_layout(height=300)
+    if mobile_view and show_legend:
         fig.update_layout(height=450)
         fig.update_layout(legend=dict(
             yanchor="bottom",
     return fig
 def shorten_model_name(full_name):
     # Split the name into parts
     parts = full_name.split('-')
         short_name = '-'.join(short_name_parts)
         # Remove any leading or trailing hyphens
+        short_name = full_name[0] + '-' + short_name.strip('-')
     return short_name
 def label_map(model_list: list) -> dict:
+    """
     Generate a map from long names to short names, to plot them in frontend graph
     Define the short names in src/assets/text_content.py
     Args:
         model_list: A list of long model names
     Returns:
         short_name: A dict from long to short name
+    """
     short_names = {}
     for model_name in model_list:
         if model_name in SHORT_NAMES:
         short_names[model_name] = short_name
     return short_names
+def split_models(model_list: list):
+    """
     Split the models into open source and commercial
+    """
     open_models = []
+    commercial_models = []
+    open_backends = {"huggingface_local", "huggingface_multimodal", "openai_compatible"}  # Define backends considered as open
+    # Load model registry data from main repo
+    model_registry_url = "https://raw.githubusercontent.com/clp-research/clembench/main/backends/model_registry.json"
+    response = requests.get(model_registry_url)
+    if response.status_code == 200:
+        json_data = json.loads(response.text)
+        # Classify as Open or Commercial based on the defined backend in the model registry
+        backend_mapping = {}
+        for model_name in model_list:
+            model_prefix = model_name.split('-')[0]  # Get the prefix part of the model name
+            for entry in json_data:
+                if entry["model_name"].startswith(model_prefix):
+                    backend = entry["backend"]
+                    # Classify based on backend
+                    if backend in open_backends:
+                        open_models.append(model_name)
+                    else:
+                        commercial_models.append(model_name)
+                    break
+    else:
+        print(f"Failed to read JSON file: Status Code : {response.status_code}")
     open_models.sort(key=lambda o: o.upper())
+    commercial_models.sort(key=lambda c: c.upper())
+    # Add missing model from the model_registry
+    if "dolphin-2.5-mixtral-8x7b" in model_list:
+        open_models.append("dolphin-2.5-mixtral-8x7b")
+    return open_models, commercial_models
+"""
+Update Functions, for when the leaderboard selection changes
+"""
+def update_open_models(leaderboard: str = TEXT_NAME):
+    """
+    Change the checkbox group of Open Models based on the leaderboard selected
+    Args:
+        leaderboard: Selected leaderboard from the frontend [Default - Text Leaderboard]
+    Return:
+        Updated checkbox group for Open Models, based on the leaderboard selected
+    """
+    github_data = get_github_data()
+    leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
+    models = leaderboard_data.iloc[:, 0].unique().tolist()
+    open_models, commercial_models = split_models(models)
+    return gr.CheckboxGroup(
+        open_models,
+        value=[],
+        elem_id="value-select-1",
+        interactive=True,
+    )
+def update_closed_models(leaderboard: str = TEXT_NAME):
+    """
+    Change the checkbox group of Closed Models based on the leaderboard selected
+    Args:
+        leaderboard: Selected leaderboard from the frontend [Default - Text Leaderboard]
+    Return:
+        Updated checkbox group for Closed Models, based on the leaderboard selected
+    """
+    github_data = get_github_data()
+    leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
+    models = leaderboard_data.iloc[:, 0].unique().tolist()
+    open_models, commercial_models = split_models(models)
+    return gr.CheckboxGroup(
+        commercial_models,
+        value=[],
+        elem_id="value-select-2",
+        interactive=True,
+    )
+def get_plot_df(leaderboard: str = TEXT_NAME) -> pd.DataFrame:
+    """
+    Get the DataFrame for plotting based on the selected leaderboard.
+    Args:
+        leaderboard: Selected leaderboard.
+    Returns:
+        DataFrame with model data.
+    """
+    github_data = get_github_data()
+    return github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
+"""
+Reset Functions for when the Leaderboard selection changes
+"""
+def reset_show_all():
+    return gr.CheckboxGroup(
+            ["Select All Models"],
+            label="Show plot for all models 🤖",
+            value=[],
+            elem_id="value-select-3",
+            interactive=True,
+        )
+def reset_show_names():
+    return gr.CheckboxGroup(
+        ["Show Names"],
+        label="Show names of models on the plot 🏷️",
+        value=[],
+        elem_id="value-select-4",
+        interactive=True,
+    )
+def reset_show_legend():
+    return gr.CheckboxGroup(
+        ["Show Legend"],
+        label="Show legend on the plot 💡",
+        value=[],
+        elem_id="value-select-5",
+        interactive=True,
+    )
+def reset_mobile_view():
+    return gr.CheckboxGroup(
+        ["Mobile View"],
+        label="View plot on smaller screens 📱",
+        value=[],
+        elem_id="value-select-6",
+        interactive=True,
+    )
+if __name__ == '__main__':
+    mm_model_list = ['gpt-4o-2024-05-13', 'gpt-4-1106-vision-preview', 'claude-3-opus-20240229', 'gemini-1.5-pro-latest',
+                     'gemini-1.5-flash-latest', 'llava-v1.6-34b-hf', 'llava-v1.6-vicuna-13b-hf', 'idefics-80b-instruct',
+                     'llava-1.5-13b-hf', 'idefics-9b-instruct']
+    text_model_list = ['vicuna-33b-v1.3', 'gpt-4-0125-preview', 'gpt-4-turbo-2024-04-09', 'claude-3-5-sonnet-20240620', 'gpt-4-1106-preview',
+                         'gpt-4-0613', 'gpt-4o-2024-05-13', 'claude-3-opus-20240229', 'gemini-1.5-pro-latest',
+                         'Meta-Llama-3-70B-Instruct-hf', 'claude-2.1', 'gemini-1.5-flash-latest', 'claude-3-sonnet-20240229',
+                         'Qwen1.5-72B-Chat', 'mistral-large-2402', 'gpt-3.5-turbo-0125', 'gemini-1.0-pro', 'command-r-plus', 'openchat_3.5',
+                         'claude-3-haiku-20240307', 'sheep-duck-llama-2-70b-v1.1', 'Meta-Llama-3-8B-Instruct-hf', 'openchat-3.5-1210',
+                         'WizardLM-70b-v1.0', 'openchat-3.5-0106', 'Qwen1.5-14B-Chat', 'mistral-medium-2312', 'Qwen1.5-32B-Chat',
+                         'codegemma-7b-it', 'dolphin-2.5-mixtral-8x7b', 'CodeLlama-34b-Instruct-hf', 'command-r', 'gemma-1.1-7b-it',
+                         'SUS-Chat-34B', 'Mixtral-8x22B-Instruct-v0.1', 'tulu-2-dpo-70b', 'Nous-Hermes-2-Mixtral-8x7B-SFT',
+                         'WizardLM-13b-v1.2', 'Mistral-7B-Instruct-v0.2', 'Yi-34B-Chat', 'Mixtral-8x7B-Instruct-v0.1',
+                         'Mistral-7B-Instruct-v0.1', 'Yi-1.5-34B-Chat', 'vicuna-13b-v1.5', 'Yi-1.5-6B-Chat', 'Starling-LM-7B-beta',
+                         'sheep-duck-llama-2-13b', 'Yi-1.5-9B-Chat', 'gemma-1.1-2b-it', 'Qwen1.5-7B-Chat', 'gemma-7b-it',
+                         'llama-2-70b-chat-hf', 'Qwen1.5-0.5B-Chat', 'Qwen1.5-1.8B-Chat']
+    om, cm = split_models(mm_model_list)
+    print("Open")
+    print(om)
+    print("Closed")
+    print(cm)

src/version_utils.py ADDED Viewed

	@@ -0,0 +1,95 @@

+## REQUIRED OUTPUT ###
+# A list of version names -> v1.6, v.6_multimodal, v1.6_quantized, v1.5, v0.9, etc......
+# A corresponding DataFrame?
+import requests
+from datetime import datetime
+import pandas as pd
+import json
+from io import StringIO
+from src.leaderboard_utils import process_df
+from src.assets.text_content import REPO
+def get_versions_data():
+    """
+    Read and process data from CSV files of all available versions hosted on GitHub. - https://github.com/clembench/clembench-runs
+    Returns:
+        versions_data:
+            -
+    """
+    base_repo = REPO
+    json_url = base_repo + "benchmark_runs.json"
+    response = requests.get(json_url)
+    # Check if the JSON file request was successful
+    if response.status_code != 200:
+        print(f"Failed to read JSON file: Status Code: {response.status_code}")
+        return None, None, None, None
+    json_data = response.json()
+    versions = json_data['versions']
+    # Sort version names - latest first
+    version_names = sorted(
+        [ver['version'] for ver in versions],
+        key=lambda v: float(v[1:]),
+        reverse=True
+    )
+    print(f"Found {len(version_names)} versions from get_versions_data(): {version_names}.")
+    # Get Last updated date of the latest version
+    latest_version = version_names[0]
+    latest_date = next(
+        ver['date'] for ver in versions if ver['version'] == latest_version
+    )
+    formatted_date = datetime.strptime(latest_date, "%Y/%m/%d").strftime("%d %b %Y")
+    # Get Versions data
+    versions_data = {"latest": latest_version, "date": formatted_date}
+    # Collect Dataframes
+    dfs = []
+    for version in version_names:
+        text_url = f"{base_repo}{version}/results.csv"
+        mm_url = f"{base_repo}{version}_multimodal/results.csv"
+        quant_url = f"{base_repo}{version}_quantized/results.csv"
+        # Text Data
+        response = requests.get(text_url)
+        if response.status_code == 200:
+            df = pd.read_csv(StringIO(response.text))
+            df = process_df(df)
+            df = df.sort_values(by=df.columns[1], ascending=False)  # Sort by clemscore column
+            versions_data[version] = df
+        else:
+            print(f"Failed to read Text-only leaderboard CSV file for version: {version}. Status Code: {response.status_code}")
+        # Multimodal Data
+        mm_response = requests.get(mm_url)
+        if mm_response.status_code == 200:
+            mm_df = pd.read_csv(StringIO(mm_response.text))
+            mm_df = process_df(mm_df)
+            mm_df = mm_df.sort_values(by=mm_df.columns[1], ascending=False)  # Sort by clemscore column
+            versions_data[version+"_multimodal"] = mm_df
+        else:
+            print(f"Failed to read multimodal leaderboard CSV file for version: {version}: Status Code: {mm_response.status_code}. Please ignore this message if multimodal results are not available for this version")
+        # Multimodal Data
+        q_response = requests.get(quant_url)
+        if q_response.status_code == 200:
+            q_df = pd.read_csv(StringIO(q_response.text))
+            q_df = process_df(q_df)
+            q_df = q_df.sort_values(by=q_df.columns[1], ascending=False)  # Sort by clemscore column
+            versions_data[version + "_quantized"] = q_df
+        else:
+            print(f"Failed to read quantized leaderboard CSV file for version: {version}: Status Code: {mm_response.status_code}. Please ignore this message if quantized results are not available for this version")
+    return versions_data
+if __name__ == "__main__":
+    versions_data = get_versions_data()
+    print(versions_data.keys())