KennyUTC commited on
Commit
1b0ca05
·
1 Parent(s): 86d69b4

update leaderboard

Browse files
Files changed (1) hide show
  1. app.py +104 -159
app.py CHANGED
@@ -16,6 +16,107 @@ head_style = """
16
  </style>
17
  """
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
20
  results = load_results()['results']
21
  N_MODEL = len(results)
@@ -32,84 +133,8 @@ with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
32
 
33
  with gr.Tabs(elem_classes='tab-buttons') as tabs:
34
  with gr.TabItem('🏅 LMM Math Leaderboard', elem_id='main', id=0):
35
- _, check_box = BUILD_L1_DF(results)
36
-
37
- table = generate_table(results)
38
- table['Rank'] = list(range(1, len(table) + 1))
39
-
40
- type_map = check_box['type_map']
41
- type_map['Rank'] = 'number'
42
-
43
- checkbox_group = gr.CheckboxGroup(
44
- choices=check_box['all'],
45
- value=check_box['required'],
46
- label='Evaluation Dimension',
47
- interactive=True,
48
- )
49
-
50
- headers = ['Rank'] + check_box['essential'] + checkbox_group.value
51
- with gr.Row():
52
- model_name = gr.Textbox(
53
- value='Input the Model Name (fuzzy)',
54
- label='Model Name',
55
- interactive=True,
56
- visible=True)
57
- model_size = gr.CheckboxGroup(
58
- choices=MODEL_SIZE,
59
- value=MODEL_SIZE,
60
- label='Model Size',
61
- interactive=True
62
- )
63
- model_type = gr.CheckboxGroup(
64
- choices=MODEL_TYPE,
65
- value=MODEL_TYPE,
66
- label='Model Type',
67
- interactive=True
68
- )
69
-
70
- data_component = gr.components.DataFrame(
71
- value=table[headers],
72
- type='pandas',
73
- datatype=[type_map[x] for x in headers],
74
- interactive=False,
75
- visible=True)
76
-
77
- def filter_df(fields, model_name, model_size, model_type):
78
- results = load_results()['results']
79
- headers = ['Rank'] + check_box['essential'] + fields
80
-
81
- df = generate_table(results)
82
-
83
- df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']]
84
- df = df[df['flag']]
85
- df.pop('flag')
86
- if len(df):
87
- df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))]
88
- df = df[df['flag']]
89
- df.pop('flag')
90
- df['Rank'] = list(range(1, len(df) + 1))
91
- default_val = 'Input the Model Name (fuzzy)'
92
- if model_name != default_val:
93
- print(model_name)
94
- model_name = model_name.lower()
95
- method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Method']]
96
- flag = [model_name in name for name in method_names]
97
- df['TEMP_FLAG'] = flag
98
- df = df[df['TEMP_FLAG'] == True]
99
- df.pop('TEMP_FLAG')
100
-
101
- comp = gr.components.DataFrame(
102
- value=df[headers],
103
- type='pandas',
104
- datatype=[type_map[x] for x in headers],
105
- interactive=False,
106
- visible=True)
107
- return comp
108
-
109
- for cbox in [checkbox_group, model_size, model_type]:
110
- cbox.change(fn=filter_df, inputs=[checkbox_group, model_name, model_size, model_type], outputs=data_component)
111
- model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name, model_size, model_type], outputs=data_component)
112
-
113
  for i, dataset in enumerate(DATASETS):
114
  tab_name_map = {
115
  'MathVista': 'MathVista (Test Mini)',
@@ -118,87 +143,7 @@ with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
118
 
119
  with gr.TabItem(
120
  f'📊 {dataset if dataset not in tab_name_map else tab_name_map[dataset]}', elem_id=dataset, id=i + 2):
121
-
122
- s = structs[i]
123
- s.table, s.check_box = BUILD_L2_DF(results, dataset)
124
- s.type_map = s.check_box['type_map']
125
- s.type_map['Rank'] = 'number'
126
-
127
- s.checkbox_group = gr.CheckboxGroup(
128
- choices=s.check_box['all'],
129
- value=s.check_box['required'],
130
- label=f'{dataset} CheckBoxes',
131
- interactive=True,
132
- )
133
- s.headers = ['Rank'] + s.check_box['essential'] + s.checkbox_group.value
134
- s.table['Rank'] = list(range(1, len(s.table) + 1))
135
-
136
- with gr.Row():
137
- s.model_name = gr.Textbox(
138
- value='Input the Model Name (fuzzy)',
139
- label='Model Name',
140
- interactive=True,
141
- visible=True)
142
- s.model_size = gr.CheckboxGroup(
143
- choices=MODEL_SIZE,
144
- value=MODEL_SIZE,
145
- label='Model Size',
146
- interactive=True
147
- )
148
- s.model_type = gr.CheckboxGroup(
149
- choices=MODEL_TYPE,
150
- value=MODEL_TYPE,
151
- label='Model Type',
152
- interactive=True
153
- )
154
- s.data_component = gr.components.DataFrame(
155
- value=s.table[s.headers],
156
- type='pandas',
157
- datatype=[s.type_map[x] for x in s.headers],
158
- interactive=False,
159
- visible=True)
160
- s.dataset = gr.Textbox(value=dataset, label=dataset, visible=False)
161
-
162
- def filter_df_l2(dataset_name, fields, model_name, model_size, model_type):
163
- results = load_results()['results']
164
- s = structs[DATASETS.index(dataset_name)]
165
- headers = ['Rank'] + s.check_box['essential'] + fields
166
- df = cp.deepcopy(s.table)
167
- df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']]
168
- df = df[df['flag']]
169
- df.pop('flag')
170
- if len(df):
171
- df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))]
172
- df = df[df['flag']]
173
- df.pop('flag')
174
- df['Rank'] = list(range(1, len(df) + 1))
175
- default_val = 'Input the Model Name (fuzzy)'
176
- if model_name != default_val:
177
- print(model_name)
178
- model_name = model_name.lower()
179
- method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Method']]
180
- flag = [model_name in name for name in method_names]
181
- df['TEMP_FLAG'] = flag
182
- df = df[df['TEMP_FLAG'] == True]
183
- df.pop('TEMP_FLAG')
184
-
185
- comp = gr.components.DataFrame(
186
- value=df[headers],
187
- type='pandas',
188
- datatype=[s.type_map[x] for x in headers],
189
- interactive=False,
190
- visible=True)
191
- return comp
192
-
193
- for cbox in [s.checkbox_group, s.model_size, s.model_type]:
194
- cbox.change(
195
- fn=filter_df_l2,
196
- inputs=[s.dataset, s.checkbox_group, s.model_name, s.model_size, s.model_type],
197
- outputs=s.data_component)
198
- s.model_name.submit(
199
- fn=filter_df_l2,
200
- inputs=[s.dataset, s.checkbox_group, s.model_name, s.model_size, s.model_type],
201
- outputs=s.data_component)
202
 
203
  with gr.Row():
204
  with gr.Accordion('Citation', open=False):
 
16
  </style>
17
  """
18
 
19
+
20
+ def math_main_tab(results):
21
+ _, check_box = BUILD_L1_DF(results)
22
+ table = generate_table(results)
23
+ table['Rank'] = list(range(1, len(table) + 1))
24
+ type_map = check_box['type_map']
25
+ type_map['Rank'] = 'number'
26
+
27
+ checkbox_group = gr.CheckboxGroup(choices=check_box['all'], value=check_box['required'], label='Evaluation Dimension')
28
+
29
+ headers = ['Rank'] + check_box['essential'] + checkbox_group.value
30
+ with gr.Row():
31
+ model_name = gr.Textbox(value='Input the Model Name (fuzzy)', label='Model Name')
32
+ model_size = gr.CheckboxGroup(choices=MODEL_SIZE, value=MODEL_SIZE, label='Model Size')
33
+ model_type = gr.CheckboxGroup(choices=MODEL_TYPE, value=MODEL_TYPE, label='Model Type')
34
+
35
+ data_component = gr.components.DataFrame(value=table[headers], datatype=[type_map[x] for x in headers])
36
+
37
+ def filter_df(fields, model_name, model_size, model_type):
38
+ results = load_results()['results']
39
+ headers = ['Rank'] + check_box['essential'] + fields
40
+
41
+ df = generate_table(results)
42
+
43
+ df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']]
44
+ df = df[df['flag']]
45
+ df.pop('flag')
46
+ if len(df):
47
+ df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))]
48
+ df = df[df['flag']]
49
+ df.pop('flag')
50
+ df['Rank'] = list(range(1, len(df) + 1))
51
+ default_val = 'Input the Model Name (fuzzy)'
52
+ if model_name != default_val:
53
+ method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Method']]
54
+ flag = [model_name.lower() in name for name in method_names]
55
+ df['TEMP'] = flag
56
+ df = df[df['TEMP'] == True]
57
+ df.pop('TEMP')
58
+
59
+ comp = gr.components.DataFrame(value=df[headers], datatype=[type_map[x] for x in headers])
60
+ return comp
61
+
62
+ for cbox in [checkbox_group, model_size, model_type]:
63
+ cbox.change(fn=filter_df, inputs=[checkbox_group, model_name, model_size, model_type], outputs=data_component)
64
+ model_name.submit(fn=filter_df, inputs=[checkbox_group, model_name, model_size, model_type], outputs=data_component)
65
+
66
+
67
+ def dataset_tab(results, struct, dataset):
68
+ s = struct
69
+ s.table, s.check_box = BUILD_L2_DF(results, dataset)
70
+ s.type_map = s.check_box['type_map']
71
+ s.type_map['Rank'] = 'number'
72
+
73
+ s.checkbox_group = gr.CheckboxGroup(choices=s.check_box['all'], value=s.check_box['required'], label=f'{dataset} CheckBoxes')
74
+ s.headers = ['Rank'] + s.check_box['essential'] + s.checkbox_group.value
75
+ s.table['Rank'] = list(range(1, len(s.table) + 1))
76
+
77
+ with gr.Row():
78
+ s.model_name = gr.Textbox(value='Input the Model Name (fuzzy)', label='Model Name')
79
+ s.model_size = gr.CheckboxGroup(choices=MODEL_SIZE, value=MODEL_SIZE, label='Model Size')
80
+ s.model_type = gr.CheckboxGroup(choices=MODEL_TYPE, value=MODEL_TYPE, label='Model Type')
81
+
82
+ s.data_component = gr.components.DataFrame(value=s.table[s.headers], datatype=[s.type_map[x] for x in s.headers])
83
+ s.dataset = gr.Textbox(value=dataset, label=dataset, visible=False)
84
+
85
+ def filter_df_l2(dataset_name, fields, model_name, model_size, model_type):
86
+ results = load_results()['results']
87
+ s = structs[DATASETS.index(dataset_name)]
88
+ headers = ['Rank'] + s.check_box['essential'] + fields
89
+ df = cp.deepcopy(s.table)
90
+ df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']]
91
+ df = df[df['flag']]
92
+ df.pop('flag')
93
+ if len(df):
94
+ df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))]
95
+ df = df[df['flag']]
96
+ df.pop('flag')
97
+ df['Rank'] = list(range(1, len(df) + 1))
98
+ default_val = 'Input the Model Name (fuzzy)'
99
+ if model_name != default_val:
100
+ method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Method']]
101
+ flag = [model_name.lower() in name for name in method_names]
102
+ df['TEMP'] = flag
103
+ df = df[df['TEMP'] == True]
104
+ df.pop('TEMP')
105
+
106
+ comp = gr.components.DataFrame(value=df[headers], datatype=[s.type_map[x] for x in headers])
107
+ return comp
108
+
109
+ for cbox in [s.checkbox_group, s.model_size, s.model_type]:
110
+ cbox.change(
111
+ fn=filter_df_l2,
112
+ inputs=[s.dataset, s.checkbox_group, s.model_name, s.model_size, s.model_type],
113
+ outputs=s.data_component)
114
+ s.model_name.submit(
115
+ fn=filter_df_l2,
116
+ inputs=[s.dataset, s.checkbox_group, s.model_name, s.model_size, s.model_type],
117
+ outputs=s.data_component)
118
+
119
+
120
  with gr.Blocks(title="Math Leaderboard", head=head_style) as demo:
121
  results = load_results()['results']
122
  N_MODEL = len(results)
 
133
 
134
  with gr.Tabs(elem_classes='tab-buttons') as tabs:
135
  with gr.TabItem('🏅 LMM Math Leaderboard', elem_id='main', id=0):
136
+ math_main_tab(results)
137
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  for i, dataset in enumerate(DATASETS):
139
  tab_name_map = {
140
  'MathVista': 'MathVista (Test Mini)',
 
143
 
144
  with gr.TabItem(
145
  f'📊 {dataset if dataset not in tab_name_map else tab_name_map[dataset]}', elem_id=dataset, id=i + 2):
146
+ dataset_tab(results, structs[i], dataset)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  with gr.Row():
149
  with gr.Accordion('Citation', open=False):