Update app.py
Browse files
app.py
CHANGED
@@ -1,115 +1,89 @@
|
|
1 |
-
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
-
import numpy as np
|
5 |
-
|
6 |
-
from collections import Counter
|
7 |
-
|
8 |
-
from huggingface_hub import HfApi
|
9 |
-
from datasets import load_dataset
|
10 |
|
11 |
api = HfApi()
|
12 |
-
list_models = api.list_models()
|
13 |
-
|
14 |
-
def fetch_dataset_and_init():
|
15 |
-
dataset = load_dataset("librarian-bots/model_cards_with_metadata", split="train")
|
16 |
-
library_names = dataset["library_name"]
|
17 |
-
|
18 |
-
string_counts = Counter(library_names)
|
19 |
-
string_counts_series = pd.Series(string_counts)
|
20 |
-
|
21 |
-
# Sort the series in descending order
|
22 |
-
df = string_counts_series.sort_values(ascending=False).to_frame()
|
23 |
-
df.columns = ["count"]
|
24 |
-
df = df.reset_index()
|
25 |
-
df = df.rename(columns={"index": "library_name"})
|
26 |
-
|
27 |
-
df.replace(to_replace=[None], value="No library_name", inplace=True)
|
28 |
-
df_log = df.copy()
|
29 |
-
df_log['count'] = np.log(df_log['count'])
|
30 |
-
|
31 |
-
return df, df_log
|
32 |
-
|
33 |
-
df, df_log = fetch_dataset_and_init()
|
34 |
-
|
35 |
-
def get_current_nb_models():
|
36 |
-
# We need this hack since `list_models` returns a generator..
|
37 |
-
total_models = sum(1 for _ in list_models)
|
38 |
-
diff_models = total_models - df["count"].sum()
|
39 |
-
return str(diff_models)
|
40 |
-
|
41 |
-
plot_height = 512
|
42 |
-
plot_width = 1512
|
43 |
-
select_box = ["all"]
|
44 |
-
top_k = len(df)
|
45 |
-
|
46 |
-
def bar_plot_fn(display, top_k, select_box):
|
47 |
-
if display == "simple":
|
48 |
-
if select_box is not None and ("all" not in select_box or select_box != ["all"]):
|
49 |
-
current_df = df[df["library_name"].isin(select_box)]
|
50 |
-
else:
|
51 |
-
current_df = df[:top_k]
|
52 |
-
|
53 |
-
return gr.BarPlot(
|
54 |
-
current_df,
|
55 |
-
x="library_name",
|
56 |
-
y="count",
|
57 |
-
tooltip=["library_name", "count"],
|
58 |
-
height=plot_height,
|
59 |
-
width=plot_width
|
60 |
-
)
|
61 |
-
elif display == "log":
|
62 |
-
if select_box is not None and ("all" not in select_box or select_box != ["all"]):
|
63 |
-
current_df = df_log[df_log["library_name"].isin(select_box)]
|
64 |
-
else:
|
65 |
-
current_df = df_log[:top_k]
|
66 |
-
|
67 |
-
return gr.BarPlot(
|
68 |
-
current_df,
|
69 |
-
x="library_name",
|
70 |
-
y="count",
|
71 |
-
tooltip=["library_name", "count"],
|
72 |
-
height=plot_height,
|
73 |
-
width=plot_width
|
74 |
-
)
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
with gr.Blocks() as bar_plot:
|
78 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
with gr.Column():
|
80 |
display = gr.Dropdown(
|
81 |
-
choices=
|
82 |
-
|
83 |
-
"log",
|
84 |
-
],
|
85 |
-
value="simple",
|
86 |
label="Type of Bar Plot",
|
|
|
87 |
)
|
88 |
-
|
89 |
-
|
90 |
-
value=len(df),
|
91 |
-
minimum=1,
|
92 |
-
maximum=len(df),
|
93 |
-
step=1,
|
94 |
-
)
|
95 |
-
with gr.Column():
|
96 |
-
plot = gr.BarPlot()
|
97 |
-
|
98 |
-
with gr.Row():
|
99 |
-
fetch_button = gr.Button(value="Fetch current number of models without model cards (takes up to 1min to fetch everything)")
|
100 |
-
text_box = gr.Textbox(value="", label="Number of models without model cards")
|
101 |
-
|
102 |
-
with gr.Column():
|
103 |
-
select_box = gr.Dropdown(
|
104 |
-
["all"] + df["library_name"].tolist(), value=["all"], multiselect=True, label="Libraries to inspect", info="Select specific libraries to inspect"
|
105 |
-
)
|
106 |
-
|
107 |
-
|
108 |
-
top_k.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot)
|
109 |
-
display.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot)
|
110 |
-
select_box.change(bar_plot_fn, inputs=[display, top_k, select_box], outputs=plot)
|
111 |
-
|
112 |
-
fetch_button.click(get_current_nb_models, outputs=[text_box])
|
113 |
-
bar_plot.load(fn=bar_plot_fn, inputs=[display, top_k], outputs=plot)
|
114 |
|
115 |
bar_plot.launch()
|
|
|
1 |
+
from datasets import load_dataset
|
2 |
+
from huggingface_hub import ModelCard
|
3 |
+
from huggingface_hub import HfApi
|
4 |
+
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
api = HfApi()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
repo_id = "librarian-bots/model_cards_with_metadata"
|
11 |
+
|
12 |
+
dataset = load_dataset(repo_id, split='train')
|
13 |
+
dataset = dataset.filter(lambda x: x['library_name']=='transformers')
|
14 |
+
|
15 |
+
list_commits = api.list_repo_commits(repo_id, repo_type="dataset")
|
16 |
+
commits_date_dict = {commit.created_at.strftime("%m/%d/%Y"):commit.commit_id for commit in list_commits}
|
17 |
+
current_date = "latest"
|
18 |
+
|
19 |
+
def get_data(commit_date="latest"):
|
20 |
+
ds_kwargs = {}
|
21 |
+
if commit_date != "latest":
|
22 |
+
current_date = commit_date
|
23 |
+
commit_id = commits_date_dict[commit_date]
|
24 |
+
ds_kwargs = {"revision": commit_id}
|
25 |
+
|
26 |
+
dataset = load_dataset(repo_id, split='train', **ds_kwargs)
|
27 |
+
dataset = dataset.filter(lambda x: x['library_name']=='transformers')
|
28 |
+
|
29 |
+
def pipeline_tag_not_in_card(card):
|
30 |
+
try:
|
31 |
+
model_card_data = ModelCard(card).data
|
32 |
+
if model_card_data.library_name is None:
|
33 |
+
return True
|
34 |
+
return False
|
35 |
+
except AttributeError:
|
36 |
+
return False
|
37 |
+
except Exception:
|
38 |
+
return False
|
39 |
+
|
40 |
+
ds = dataset.map(lambda x: {"missing_library_name": pipeline_tag_not_in_card(x['card'])}, num_proc=4)
|
41 |
+
|
42 |
+
|
43 |
+
data = pd.DataFrame(
|
44 |
+
{
|
45 |
+
"name": ["Total Number of transformers Model", "Total number of models with missing 'library_name: transformers' in model card."],
|
46 |
+
"count": [len(ds), sum(ds["missing_library_name"])],
|
47 |
+
}
|
48 |
+
)
|
49 |
+
return data
|
50 |
+
|
51 |
+
def fetch_fn(commit_date="latest"):
|
52 |
+
data = get_data(commit_date=commit_date)
|
53 |
+
return gr.BarPlot(
|
54 |
+
data,
|
55 |
+
x="name",
|
56 |
+
y="count",
|
57 |
+
title="Count of Model cards with the correct library_name tag",
|
58 |
+
height=256,
|
59 |
+
width=1024,
|
60 |
+
tooltip=["name", "count"],
|
61 |
+
vertical=False
|
62 |
+
)
|
63 |
+
|
64 |
+
data = get_data()
|
65 |
|
66 |
with gr.Blocks() as bar_plot:
|
67 |
with gr.Column():
|
68 |
+
with gr.Row():
|
69 |
+
plot = gr.BarPlot(
|
70 |
+
data,
|
71 |
+
x="name",
|
72 |
+
y="count",
|
73 |
+
title=f"Count of Model cards with the correct library_name tag at the date {current_date}",
|
74 |
+
height=256,
|
75 |
+
width=1024,
|
76 |
+
tooltip=["name", "count"],
|
77 |
+
vertical=False
|
78 |
+
)
|
79 |
with gr.Column():
|
80 |
display = gr.Dropdown(
|
81 |
+
choices=list(commits_date_dict.keys()),
|
82 |
+
value="latest",
|
|
|
|
|
|
|
83 |
label="Type of Bar Plot",
|
84 |
+
|
85 |
)
|
86 |
+
|
87 |
+
display.change(fetch_fn, inputs=display, outputs=plot)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
bar_plot.launch()
|