multimodal-clem-leaderboard

Running

App Files Files Community

sherzod-hakimov commited on Apr 22, 2024

Commit

69c36b6

1 Parent(s): b345ff4

update page loading

Browse files

Files changed (3) hide show

app.py +32 -12
src/assets/text_content.py +5 -1
src/plot_utils.py +24 -3

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import gradio as gr
-from src.assets.text_content import TITLE, INTRODUCTION_TEXT
 from src.leaderboard_utils import filter_search, get_github_data
 from src.plot_utils import split_models, compare_plots
 # For Leaderboards
 # Get CSV data
 global primary_leaderboard_df, version_dfs, version_names
 primary_leaderboard_df, version_dfs, version_names = get_github_data()
@@ -38,19 +39,22 @@ with main_app:
                     elem_id="search-bar",
                 )
-            leaderboard_table = gr.components.Dataframe(
                 value=primary_leaderboard_df[0],
                 elem_id="leaderboard-table",
                 interactive=False,
                 visible=True,
             )
             # Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
-            dummy_leaderboard_table = gr.components.Dataframe(
                 value=primary_leaderboard_df[0],
                 elem_id="leaderboard-table",
                 interactive=False,
-                visible=False,
             )
             search_bar.submit(
@@ -106,6 +110,14 @@ with main_app:
                         elem_id="value-select-5",
                         interactive=True,
                     )
             with gr.Row():
                 dummy_plot_df = gr.DataFrame(
@@ -120,35 +132,42 @@ with main_app:
             open_models_selection.change(
                 compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
                 plot_output,
                 queue=True
             )
             closed_models_selection.change(
                 compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
                 plot_output,
                 queue=True
             )
             show_all.change(
                 compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
                 plot_output,
                 queue=True
             )
             show_names.change(
                 compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
                 plot_output,
                 queue=True
             )
             show_legend.change(
                 compare_plots,
-                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend],
                 plot_output,
                 queue=True
             )
@@ -165,18 +184,19 @@ with main_app:
                     elem_id="search-bar-2",
                 )
-            prev_table = gr.components.Dataframe(
                 value=prev_df,
                 elem_id="leaderboard-table",
                 interactive=False,
                 visible=True,
             )
-            dummy_prev_table = gr.components.Dataframe(
                 value=prev_df,
                 elem_id="leaderboard-table",
                 interactive=False,
-                visible=False,
             )
             search_bar_prev.submit(

 import gradio as gr
+from src.assets.text_content import TITLE, INTRODUCTION_TEXT, CLEMSCORE_TEXT
 from src.leaderboard_utils import filter_search, get_github_data
 from src.plot_utils import split_models, compare_plots
 # For Leaderboards
+dataframe_height = 800 # Height of the table in pixels
 # Get CSV data
 global primary_leaderboard_df, version_dfs, version_names
 primary_leaderboard_df, version_dfs, version_names = get_github_data()
                     elem_id="search-bar",
                 )
+            leaderboard_table = gr.Dataframe(
                 value=primary_leaderboard_df[0],
                 elem_id="leaderboard-table",
                 interactive=False,
                 visible=True,
+                height=dataframe_height
             )
+            gr.HTML(CLEMSCORE_TEXT)
             # Add a dummy leaderboard to handle search queries from the primary_leaderboard_df and not update primary_leaderboard_df
+            dummy_leaderboard_table = gr.Dataframe(
                 value=primary_leaderboard_df[0],
                 elem_id="leaderboard-table",
                 interactive=False,
+                visible=False
             )
             search_bar.submit(
                         elem_id="value-select-5",
                         interactive=True,
                     )
+                with gr.Column():
+                    mobile_view = gr.CheckboxGroup(
+                        ["Mobile View"],
+                        label ="View plot on smaller screens 📱",
+                        value=[],
+                        elem_id="value-select-6",
+                        interactive=True,
+                    )
             with gr.Row():
                 dummy_plot_df = gr.DataFrame(
             open_models_selection.change(
                 compare_plots,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
                 plot_output,
                 queue=True
             )
             closed_models_selection.change(
                 compare_plots,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
                 plot_output,
                 queue=True
             )
             show_all.change(
                 compare_plots,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
                 plot_output,
                 queue=True
             )
             show_names.change(
                 compare_plots,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
                 plot_output,
                 queue=True
             )
             show_legend.change(
                 compare_plots,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
+                plot_output,
+                queue=True
+            )
+            mobile_view.change(
+                compare_plots,
+                [dummy_plot_df, open_models_selection, closed_models_selection, show_all, show_names, show_legend, mobile_view],
                 plot_output,
                 queue=True
             )
                     elem_id="search-bar-2",
                 )
+            prev_table = gr.Dataframe(
                 value=prev_df,
                 elem_id="leaderboard-table",
                 interactive=False,
                 visible=True,
+                height=dataframe_height
             )
+            dummy_prev_table = gr.Dataframe(
                 value=prev_df,
                 elem_id="leaderboard-table",
                 interactive=False,
+                visible=False
             )
             search_bar_prev.submit(

src/assets/text_content.py CHANGED Viewed

@@ -4,13 +4,17 @@ INTRODUCTION_TEXT = """
 <h6 align="center">
 The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”.
-The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://arxiv.org/abs/2305.13455).
 Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench)
 All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs) </h6>
 """
 SHORT_NAMES = {
     "t0.0": "",
     "claude-v1.3": "cl-1.3",

 <h6 align="center">
 The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”.
+The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://aclanthology.org/2023.emnlp-main.689.pdf).
 Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench)
 All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs) </h6>
 """
+CLEMSCORE_TEXT = """
+The <i>clemscore</i> combines a score representing the overall ability to just follow the game instructions (separately scored in field <i>Played</i>) and the quality of the play in attempt where instructions were followed (field <i>Quality Scores</i>). For details about the games / interaction settings, and for results on older versions of the benchmark, see the tab <i>Versions and Details</i>.
+"""
 SHORT_NAMES = {
     "t0.0": "",
     "claude-v1.3": "cl-1.3",

src/plot_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@ import plotly.express as px
 from src.assets.text_content import SHORT_NAMES
-def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list):
     '''
     Takes in a list of models for a plotly plot
     Args:
@@ -12,6 +12,7 @@ def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list):
         ALL: Either [] or ["Show All Models"] - toggle view to plot all models
         NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
         LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
     Returns:
         Fig: plotly figure
     '''
@@ -52,11 +53,30 @@ def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list):
     fig.update_xaxes(range=[-5, 105])
     fig.update_yaxes(range=[-5, 105])
     return fig
 # ['Model', 'Clemscore', 'All(Played)', 'All(Quality Score)']
-def compare_plots(df: pd.DataFrame, LIST1: list, LIST2: list, ALL:list, NAMES:list, LEGEND: list):
     '''
     Quality Score v/s % Played plot by selecting models
     Args:
@@ -66,13 +86,14 @@ def compare_plots(df: pd.DataFrame, LIST1: list, LIST2: list, ALL:list, NAMES:li
         ALL: Either [] or ["Show All Models"] - toggle view to plot all models
         NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
         LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
     Returns:
         fig: The plot
     '''
     # Combine lists for Open source and commercial models
     LIST = LIST1 + LIST2
-    fig = plotly_plot(df, LIST, ALL, NAMES, LEGEND)
     return fig

 from src.assets.text_content import SHORT_NAMES
+def plotly_plot(df:pd.DataFrame, LIST:list, ALL:list, NAMES:list, LEGEND:list, MOBILE:list ):
     '''
     Takes in a list of models for a plotly plot
     Args:
         ALL: Either [] or ["Show All Models"] - toggle view to plot all models
         NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
         LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
+        MOBILE: Either [] or ["Mobile View"] - toggle view to for smaller screens
     Returns:
         Fig: plotly figure
     '''
     fig.update_xaxes(range=[-5, 105])
     fig.update_yaxes(range=[-5, 105])
+    if MOBILE:
+        fig.update_layout(height=300)
+    if MOBILE and LEGEND:
+        fig.update_layout(height=450)
+        fig.update_layout(legend=dict(
+            yanchor="bottom",
+            y=-5.52,
+            xanchor="left",
+            x=0.01
+        ))
+        fig.update_layout(
+            xaxis_title="",
+            yaxis_title="",
+            title="% Played v/s Quality Score"
+        )
     return fig
 # ['Model', 'Clemscore', 'All(Played)', 'All(Quality Score)']
+def compare_plots(df: pd.DataFrame, LIST1: list, LIST2: list, ALL:list, NAMES:list, LEGEND: list, MOBILE: list):
     '''
     Quality Score v/s % Played plot by selecting models
     Args:
         ALL: Either [] or ["Show All Models"] - toggle view to plot all models
         NAMES: Either [] or ["Show Names"] - toggle view to show model names on plot
         LEGEND: Either [] or ["Show Legend"] - toggle view to show legend on plot
+        MOBILE: Either [] or ["Mobile View"] - toggle view to for smaller screens
     Returns:
         fig: The plot
     '''
     # Combine lists for Open source and commercial models
     LIST = LIST1 + LIST2
+    fig = plotly_plot(df, LIST, ALL, NAMES, LEGEND, MOBILE)
     return fig