|
import copy as cp |
|
import json |
|
from collections import defaultdict |
|
from urllib.request import urlopen |
|
|
|
import gradio as gr |
|
import numpy as np |
|
import pandas as pd |
|
|
|
from meta_data import META_FIELDS, URL, DATASETS_ALL, DATASETS_ESS |
|
|
|
|
|
def listinstr(lst, s): |
|
assert isinstance(lst, list) |
|
for item in lst: |
|
if item in s: |
|
return True |
|
return False |
|
|
|
|
|
def upper_key(k): |
|
if k == 'ocr': |
|
return 'OCR' |
|
elif '_' in k: |
|
k = k.split('_') |
|
k = [x[0].upper() + x[1:] for x in k] |
|
k = ' '.join(k) |
|
return k |
|
else: |
|
return k |
|
|
|
|
|
def load_results(): |
|
data = json.loads(urlopen(URL).read()) |
|
return data |
|
|
|
|
|
def nth_large(val, vals): |
|
return sum([1 for v in vals if v > val]) + 1 |
|
|
|
|
|
def model_size_flag(sz, FIELDS): |
|
if pd.isna(sz) and 'Unknown' in FIELDS: |
|
return True |
|
if pd.isna(sz): |
|
return False |
|
sz = int(sz) |
|
if '<4B' in FIELDS and sz < 4: |
|
return True |
|
if '4B-10B' in FIELDS and sz >= 4 and sz < 10: |
|
return True |
|
if '10B-20B' in FIELDS and sz >= 10 and sz < 20: |
|
return True |
|
if '20B-40B' in FIELDS and sz >= 20 and sz < 40: |
|
return True |
|
if '>40B' in FIELDS and sz >= 40: |
|
return True |
|
return False |
|
|
|
|
|
def model_type_flag(line, FIELDS): |
|
if 'OpenSource' in FIELDS and line['OpenSource'] == 'Yes': |
|
return True |
|
if 'API' in FIELDS and line['OpenSource'] == 'No': |
|
return True |
|
return False |
|
|
|
|
|
def BUILD_L1_DF(results): |
|
check_box = {} |
|
check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model'] |
|
|
|
check_box['required'] = ['Overall'] + DATASETS_ESS |
|
check_box['all'] = ['Overall'] + DATASETS_ALL |
|
type_map = defaultdict(lambda: 'number') |
|
type_map['Method'] = 'html' |
|
type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html' |
|
type_map['OpenSource'] = type_map['Verified'] = 'str' |
|
check_box['type_map'] = type_map |
|
|
|
df = generate_table(results) |
|
return df, check_box |
|
|
|
|
|
def BUILD_L2_DF(results, dataset): |
|
res = defaultdict(list) |
|
sub = [v for v in results.values() if dataset in v] |
|
assert len(sub), dataset |
|
fields = list(sub[0][dataset].keys()) |
|
|
|
non_overall_fields = [x for x in fields if 'Overall' not in x] |
|
overall_fields = [x for x in fields if 'Overall' in x] |
|
|
|
for m in results: |
|
item = results[m] |
|
if dataset not in item: |
|
continue |
|
for k in META_FIELDS: |
|
if k == 'Param (B)': |
|
param = item['META']['Parameters'] |
|
res[k].append(float(param.replace('B', '')) if param != '' else None) |
|
elif k == 'Method': |
|
name, url = item['META']['Method'] |
|
res[k].append(f'<a href="{url}">{name}</a>') |
|
else: |
|
s = item['META'][k].replace('\n', '<br>') |
|
s = s.replace(' & ', '<br>') |
|
res[k].append(s) |
|
|
|
for d in overall_fields: |
|
res[d].append(float(item[dataset][d])) |
|
for d in non_overall_fields: |
|
res[d].append(float(item[dataset][d])) |
|
|
|
df = pd.DataFrame(res) |
|
all_fields = overall_fields + non_overall_fields |
|
|
|
required_fields = overall_fields if len(overall_fields) else non_overall_fields[:5] |
|
|
|
df = df.sort_values('Overall') |
|
df = df.iloc[::-1] |
|
|
|
check_box = {} |
|
check_box['essential'] = ['Method', 'Org', 'Param (B)', 'Language Model', 'Vision Model'] |
|
check_box['required'] = required_fields |
|
check_box['all'] = all_fields |
|
type_map = defaultdict(lambda: 'number') |
|
type_map['Method'] = 'html' |
|
type_map['Language Model'] = type_map['Vision Model'] = type_map['Org'] = 'html' |
|
type_map['OpenSource'] = type_map['Verified'] = 'str' |
|
check_box['type_map'] = type_map |
|
return df, check_box |
|
|
|
|
|
def generate_table(results): |
|
|
|
res = defaultdict(list) |
|
for i, m in enumerate(results): |
|
item = results[m] |
|
avg = 0 |
|
for k in META_FIELDS: |
|
if k == 'Param (B)': |
|
param = item['META']['Parameters'] |
|
res[k].append(float(param.replace('B', '')) if param != '' else None) |
|
elif k == 'Method': |
|
name, url = item['META']['Method'] |
|
res[k].append(f'<a href="{url}">{name}</a>') |
|
else: |
|
s = item['META'][k].replace('\n', '<br>') |
|
s = s.replace(' & ', '<br>') |
|
res[k].append(s) |
|
|
|
|
|
for d in DATASETS_ALL: |
|
key_name = 'Overall' |
|
if d in item: |
|
val = float(item[d][key_name]) |
|
val = float(f'{val:.1f}') |
|
res[d].append(val) |
|
else: |
|
res[d].append(None) |
|
if d in DATASETS_ESS: |
|
if d in item and avg is not None: |
|
avg += res[d][-1] |
|
else: |
|
avg = None |
|
|
|
if avg is not None: |
|
avg = float(f'{avg / len(DATASETS_ESS):.1f}') |
|
|
|
res['Overall'].append(avg) |
|
|
|
df = pd.DataFrame(res) |
|
overall_isna = df[pd.isna(df['Overall'])] |
|
overall_notna = df[~pd.isna(df['Overall'])] |
|
overall_notna = overall_notna.sort_values('Overall') |
|
overall_notna = overall_notna.iloc[::-1] |
|
overall_isna = overall_isna.sort_values('MathVista') |
|
overall_isna = overall_isna.iloc[::-1] |
|
df = pd.concat([overall_notna, overall_isna]) |
|
|
|
return df |
|
|