Spaces:
Running
Running
import os | |
import streamlit as st | |
import json | |
import tarfile | |
from huggingface_hub import HfFileSystem | |
hf_fs = HfFileSystem(token=os.getenv("HF_TOKEN")) | |
st.set_page_config(layout="wide") | |
# Disable scroll bar | |
st.html("<style> .main {overflow: hidden} </style>") | |
DATASET_ID: str = "LLM360/k2-eval-gallery" | |
EVAL_DIR: str = os.path.join("datasets", DATASET_ID, "k2-eval-results") | |
st.title("K2 Evaluation Gallery") | |
st.markdown("""The K2 gallery allows one to browse the output of various evaluations on intermediate K2 checkpoints, which provides an intuitive understanding on how the model develops and improves over time.""") | |
def hf_listdir(parent_dir: str): | |
return (os.path.basename(file) for file in hf_fs.ls( | |
parent_dir, detail=False | |
)) | |
with st.sidebar: | |
html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-eval-gallery/raw/main/k2-logo.svg' width='100' />" | |
st.markdown(html, unsafe_allow_html=True) | |
metric = st.radio( | |
"Choose a metric", options=hf_listdir(EVAL_DIR), help="type of evaluation benchmark task" | |
) | |
n_shot = st.radio( | |
"Selece an n-shot number", hf_listdir(os.path.join(EVAL_DIR, metric)), | |
help="number of examples included in few-shot prompting" | |
) | |
col1, col2 = st.columns(2) | |
def render_column(col_label): | |
st.header(f"Checkpoint {col_label}") | |
ckpt = st.select_slider('Select a checkpoint', sorted(hf_listdir(os.path.join(EVAL_DIR, metric, n_shot))), key=col_label + '1', help="checkpoint index from 3 to 360") | |
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`') | |
suffix, result_file = ".tar.gz", "results.json" | |
file_list: list = sorted(f_name[:-len(suffix)] for f_name in hf_listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))) | |
if result_file in file_list: | |
file_list.remove(result_file) | |
file_list = file_list + [result_file] | |
file = st.selectbox("Select a file", file_list, key=col_label + '2', help="a list of raw output files from evaluation results") | |
file += suffix | |
with tarfile.open(fileobj=hf_fs.open( | |
os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "rb" | |
), mode="r:gz") as tar: | |
f = tar.extractfile(tar.next()) | |
eval_json = json.load(f) | |
if isinstance(eval_json, list): | |
doc_id = st.slider("Select a document id", 0, len(eval_json) - 1, 0, 1, key=col_label + '3', help="index of a specific question/task in current file") | |
st.json(eval_json[doc_id]) | |
else: | |
st.json(eval_json) | |
f.close() | |
with col1: | |
render_column('A') | |
with col2: | |
render_column('B') | |