Datasets-Metrics-Viewer / src /view /metric_view_tab.py
hynky's picture
hynky HF staff
nits
638184c
from datetime import datetime
import tempfile
from typing import Callable
import gradio as gr
from functools import partial
import re
import json
from src.logic.data_fetching import fetch_datasets, fetch_graph_data, fetch_groups, fetch_metrics, update_datasets_with_regex, update_datasets_with_regex
from src.logic.data_processing import export_data
from src.logic.graph_settings import update_graph_options
from src.logic.plotting import plot_data
def create_metric_view_tab(METRICS_LOCATION_DEFAULT: str, available_datasets: gr.State):
metric_data = gr.State([])
with gr.Row():
with gr.Column(scale=2):
with gr.Row():
with gr.Column(scale=1):
base_folder = gr.Textbox(
label="Metrics Location",
value=METRICS_LOCATION_DEFAULT,
)
datasets_fetch = gr.Button("Fetch Datasets")
with gr.Column(scale=1):
regex_select = gr.Text(label="Regex filter", value=".*")
regex_button = gr.Button("Search")
with gr.Row():
selected_datasets_dropdown = gr.Dropdown(
choices=[],
label="Datasets",
multiselect=True,
interactive=True,
)
with gr.Column(scale=1):
grouping_dropdown = gr.Dropdown(
choices=[],
label="Grouping",
multiselect=False,
)
metric_name_dropdown = gr.Dropdown(
choices=[],
label="Metric name",
multiselect=False,
)
render_button = gr.Button("Render Metric", variant="primary")
with gr.Tabs():
with gr.TabItem("Graph Settings"):
log_scale_x_checkbox = gr.Checkbox(
label="Log scale x",
value=False,
)
log_scale_y_checkbox = gr.Checkbox(
label="Log scale y",
value=False,
)
rounding = gr.Number(
label="Rounding",
value=2,
)
with gr.TabItem("Grouping Settings") as group_settings:
with gr.Row() as group_choices:
with gr.Column(scale=2):
group_regex = gr.Text(
label="Group Regex",
value=None,
)
with gr.Row():
top_select = gr.Number(
label="N Groups",
value=100,
interactive=True,
)
direction_checkbox = gr.Radio(
label="Partition",
choices=[
"Top",
"Bottom",
"Most frequent (n_docs)",
],
value="Most frequent (n_docs)",
)
with gr.TabItem("Histogram Settings") as histogram_settings:
normalization_checkbox = gr.Checkbox(
label="Normalize",
value=True,
visible=False
)
cdf_checkbox = gr.Checkbox(
label="CDF",
value=False,
)
perc_checkbox = gr.Checkbox(
label="%",
value=False,
)
with gr.TabItem("Summary Settings") as summary_settings:
show_stds_checkbox = gr.Checkbox(
label="Show standard deviations",
value=False,
)
with gr.Row():
graph_output = gr.Plot(label="Graph")
with gr.Row(visible=False) as min_max_hist:
with gr.Column(scale=3):
min_max_hist_data = gr.Markdown()
with gr.Column(scale=1):
export_data_button = gr.Button("Export Data")
export_data_json = gr.File(visible=False)
def update_selected_datasets_dropdown(available_datasets, selected_datasets_dropdown):
selected_datasets = selected_datasets_dropdown or []
selected_datasets = set(selected_datasets) & set(available_datasets)
return gr.Dropdown(choices=available_datasets, value=sorted(list(selected_datasets)))
datasets_fetch.click(
fn=fetch_datasets,
inputs=[base_folder],
outputs=[available_datasets, selected_datasets_dropdown],
)
available_datasets.change(
fn=update_selected_datasets_dropdown,
inputs=[available_datasets, selected_datasets_dropdown],
outputs=selected_datasets_dropdown,
)
regex_button.click(
fn=update_datasets_with_regex,
inputs=[regex_select, selected_datasets_dropdown, available_datasets],
outputs=selected_datasets_dropdown,
)
selected_datasets_dropdown.change(
fn=fetch_groups,
inputs=[base_folder, selected_datasets_dropdown, grouping_dropdown],
outputs=grouping_dropdown,
)
grouping_dropdown.change(
fn=fetch_metrics,
inputs=[base_folder, selected_datasets_dropdown, grouping_dropdown, metric_name_dropdown],
outputs=metric_name_dropdown,
)
render_button.click(
fn=fetch_graph_data,
inputs=[
base_folder,
selected_datasets_dropdown,
metric_name_dropdown,
grouping_dropdown,
],
# We also output the graph_output = None to show the progress
outputs=[metric_data, graph_output],
)
grouping_dropdown.change(
fn=update_graph_options,
inputs=[grouping_dropdown],
outputs=[group_settings, histogram_settings, summary_settings],
)
gr.on(
triggers=[normalization_checkbox.input, rounding.input, group_regex.input, direction_checkbox.input,
top_select.input, log_scale_x_checkbox.input,
log_scale_y_checkbox.input, cdf_checkbox.input, perc_checkbox.input, show_stds_checkbox.input, metric_data.change],
fn=plot_data,
inputs=[
metric_data,
metric_name_dropdown,
normalization_checkbox,
rounding,
grouping_dropdown,
top_select,
direction_checkbox,
group_regex,
log_scale_x_checkbox,
log_scale_y_checkbox,
cdf_checkbox,
perc_checkbox,
show_stds_checkbox
],
outputs=[graph_output, min_max_hist, min_max_hist_data],
)
export_data_button.click(
fn=export_data,
inputs=[metric_data, metric_name_dropdown, grouping_dropdown],
outputs=[export_data_json],
)
return base_folder, selected_datasets_dropdown