# app.py
import json
import streamlit as st
import glob
import os
from datetime import datetime
#st.set_page_config(layout="wide")
st.title('Meta Open LLM leaderboard')
directories = os.listdir("./data")
#data_dir = directories[0]
def format_dir_date(data_dir):
# Extracting date and time information from the path
parsed_date = datetime.strptime(data_dir, "%Y%m%d_%H%M")
# Formatting the parsed date
return parsed_date.strftime("%b %d, %Y %H:%M")
data_dir = st.selectbox(
'Select different Date',
directories,
format_func=format_dir_date,
index=len(directories)-1
)
captions_map = {
"hg_average_to_agentbench_compare.png": "HF to AgentBench compare",
"hg_average_to_opencompass_compare.png": "HF to OpenCompass compare",
"hg_average_to_mt_bench_compare.png": "HF to MT-Bench compare",
"hg_average_to_mosaic_compare.png": "HF to MosaicML compare",
"hg_average_to_alpacaeval_compare.png": "HF to AlpacaEval compare"
}
st.write("Generated on: " + format_dir_date(data_dir) + "", unsafe_allow_html=True)
st.divider()
data_path = './data/' + data_dir
imgs = glob.glob(os.path.join(data_path, '*.png'))
# Extracting images that start with "hf_llm_diagram"
hf_llm_diagrams = [img for img in imgs if 'hf_llm_diagram' in os.path.basename(img)]
# Getting the remaining images
remaining_imgs = [img for img in imgs if 'hf_llm_diagram' not in os.path.basename(img)]
def print_model_list(file_name, st):
file_path = file_name[:-4] + '.json'
# Read the list from the JSON file
with open(file_path, 'r') as file:
model_id_list_loaded = json.load(file)
model_str = "
"
for model_id in model_id_list_loaded:
model_id_trunc = model_id
if len(model_id) > 35:
model_id_trunc = '...' + model_id[-35:]
model_str += f'- {model_id_trunc}
'
model_str += "
"
st.write(model_str, unsafe_allow_html=True)
st.write("HuggingFace Open LLM leaderboard by Model Size")
st.image(hf_llm_diagrams[0],use_column_width="auto")
print_model_list(hf_llm_diagrams[0],st)
cols = st.columns(2)
cols[0].image(hf_llm_diagrams[1],caption="Other or commercially permissive licenses only", use_column_width="auto")
print_model_list(hf_llm_diagrams[1],cols[0])
cols[1].image(hf_llm_diagrams[2],caption="Commercially permissive license only", use_column_width="auto")
print_model_list(hf_llm_diagrams[2],cols[1])
st.divider()
st.write("HuggingFace and Other Leaderboards: A Comparative Model Evaluation")
st.caption("Only models evaluated on both leaderboards are included.")
cols = st.columns(2)
for i, img in enumerate(remaining_imgs):
# Extract the filename from the full image path
filename = os.path.basename(img)
# Get the caption from the captions_map dictionary
caption = captions_map.get(filename, "") # If no caption is found, it will default to an empty string
# Display the image with the caption
cols[i % 2].image(img, caption=caption, width=None)
st.write(
"""
Leaderboards tracked:
""", unsafe_allow_html=True
)
st.divider()
st.write("TruthfulQA at 10% for HuggingFace Open LLM leaderboard by Model Size")
st.image(hf_llm_diagrams[3],use_column_width="auto")
st.divider()
st.subheader('About')
st.write('This meta leaderboard is built and maintained by Felix Zaslavskiy. For feedback, correction, suggestions please reach out on X at @FZaslavskiy or here via community discussions.', unsafe_allow_html=True)