Spaces:
Runtime error
Runtime error
Tristan Thrush
commited on
Commit
·
23ca923
1
Parent(s):
30f749f
removed requirement to be from autoeval org
Browse files
app.py
CHANGED
@@ -45,30 +45,24 @@ def parse_metric_value(value):
|
|
45 |
return value
|
46 |
|
47 |
|
48 |
-
def parse_metrics_rows(meta,
|
49 |
if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
|
50 |
return None
|
51 |
for result in meta["model-index"][0]["results"]:
|
52 |
if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
|
53 |
continue
|
54 |
dataset = result["dataset"]["type"]
|
55 |
-
row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-"
|
56 |
if "split" in result["dataset"]:
|
57 |
row["split"] = result["dataset"]["split"]
|
58 |
if "config" in result["dataset"]:
|
59 |
row["config"] = result["dataset"]["config"]
|
60 |
no_results = True
|
61 |
for metric in result["metrics"]:
|
|
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
if from_autoeval:
|
66 |
-
name = metric["name"].lower().strip()
|
67 |
-
else:
|
68 |
-
name = metric["type"].lower().strip()
|
69 |
-
|
70 |
-
if name in ("model_id", "dataset", "split", "config", "verified"):
|
71 |
-
# Metrics are not allowed to be named "dataset", "split", "config", or "verified".
|
72 |
continue
|
73 |
value = parse_metric_value(metric.get("value", None))
|
74 |
if value is None:
|
@@ -78,10 +72,7 @@ def parse_metrics_rows(meta, from_autoeval=False):
|
|
78 |
if name not in row or new_metric_better:
|
79 |
# overwrite the metric if the new value is better.
|
80 |
|
81 |
-
if
|
82 |
-
# if the metric is from autoeval, only include it in the leaderboard if
|
83 |
-
# it is a verified metric. Unverified metrics are already included
|
84 |
-
# in the leaderboard from the unverified model card.
|
85 |
if "verified" in metric and metric["verified"]:
|
86 |
no_results = False
|
87 |
row[name] = value
|
@@ -97,52 +88,65 @@ def get_data_wrapper():
|
|
97 |
|
98 |
def get_data():
|
99 |
data = []
|
100 |
-
|
|
|
101 |
model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
|
102 |
for model_id in tqdm(model_ids):
|
103 |
meta = get_metadata(model_id)
|
104 |
if meta is None:
|
105 |
continue
|
106 |
-
for row in parse_metrics_rows(meta
|
107 |
if row is None:
|
108 |
continue
|
109 |
row["model_id"] = model_id
|
110 |
data.append(row)
|
|
|
|
|
|
|
|
|
|
|
111 |
dataframe = pd.DataFrame.from_records(data)
|
112 |
dataframe.to_pickle("cache.pkl")
|
|
|
|
|
113 |
|
114 |
-
if exists("cache.pkl"):
|
115 |
# If we have saved the results previously, call an asynchronous process
|
116 |
# to fetch the results and update the saved file. Don't make users wait
|
117 |
# while we fetch the new results. Instead, display the old results for
|
118 |
# now. The new results should be loaded when this method
|
119 |
# is called again.
|
120 |
dataframe = pd.read_pickle("cache.pkl")
|
|
|
121 |
t = threading.Thread(name='get_data procs', target=get_data)
|
122 |
t.start()
|
123 |
else:
|
124 |
# We have to make the users wait during the first startup of this app.
|
125 |
get_data()
|
126 |
dataframe = pd.read_pickle("cache.pkl")
|
|
|
127 |
|
128 |
-
return dataframe
|
129 |
-
|
130 |
-
dataframe = get_data_wrapper()
|
131 |
|
132 |
-
|
133 |
|
134 |
st.markdown("# 🤗 Leaderboards")
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
query_params = st.experimental_get_query_params()
|
137 |
default_dataset = "common_voice"
|
138 |
if "dataset" in query_params:
|
139 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
|
140 |
default_dataset = query_params["dataset"][0]
|
141 |
|
142 |
-
only_verified_results = st.sidebar.checkbox(
|
143 |
-
"Filter for Verified Results",
|
144 |
-
)
|
145 |
-
|
146 |
dataset = st.sidebar.selectbox(
|
147 |
"Dataset",
|
148 |
selectable_datasets,
|
@@ -154,9 +158,6 @@ st.experimental_set_query_params(**{"dataset": [dataset]})
|
|
154 |
dataset_df = dataframe[dataframe.dataset == dataset]
|
155 |
dataset_df = dataset_df.dropna(axis="columns", how="all")
|
156 |
|
157 |
-
if only_verified_results:
|
158 |
-
dataset_df = dataset_df[dataset_df["verified"]]
|
159 |
-
|
160 |
selectable_configs = list(set(dataset_df["config"]))
|
161 |
config = st.sidebar.selectbox(
|
162 |
"Config",
|
@@ -171,7 +172,7 @@ split = st.sidebar.selectbox(
|
|
171 |
)
|
172 |
dataset_df = dataset_df[dataset_df.split == split]
|
173 |
|
174 |
-
selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config"
|
175 |
|
176 |
dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
|
177 |
dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
|
|
|
45 |
return value
|
46 |
|
47 |
|
48 |
+
def parse_metrics_rows(meta, only_verified=False):
|
49 |
if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
|
50 |
return None
|
51 |
for result in meta["model-index"][0]["results"]:
|
52 |
if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
|
53 |
continue
|
54 |
dataset = result["dataset"]["type"]
|
55 |
+
row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-"}
|
56 |
if "split" in result["dataset"]:
|
57 |
row["split"] = result["dataset"]["split"]
|
58 |
if "config" in result["dataset"]:
|
59 |
row["config"] = result["dataset"]["config"]
|
60 |
no_results = True
|
61 |
for metric in result["metrics"]:
|
62 |
+
name = metric["type"].lower().strip()
|
63 |
|
64 |
+
if name in ("model_id", "dataset", "split", "config"):
|
65 |
+
# Metrics are not allowed to be named "dataset", "split", "config".
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
continue
|
67 |
value = parse_metric_value(metric.get("value", None))
|
68 |
if value is None:
|
|
|
72 |
if name not in row or new_metric_better:
|
73 |
# overwrite the metric if the new value is better.
|
74 |
|
75 |
+
if only_verified:
|
|
|
|
|
|
|
76 |
if "verified" in metric and metric["verified"]:
|
77 |
no_results = False
|
78 |
row[name] = value
|
|
|
88 |
|
89 |
def get_data():
|
90 |
data = []
|
91 |
+
verified_data = []
|
92 |
+
model_ids = get_model_ids()[:100]
|
93 |
model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
|
94 |
for model_id in tqdm(model_ids):
|
95 |
meta = get_metadata(model_id)
|
96 |
if meta is None:
|
97 |
continue
|
98 |
+
for row in parse_metrics_rows(meta):
|
99 |
if row is None:
|
100 |
continue
|
101 |
row["model_id"] = model_id
|
102 |
data.append(row)
|
103 |
+
for row in parse_metrics_rows(meta, only_verified=True):
|
104 |
+
if row is None:
|
105 |
+
continue
|
106 |
+
row["model_id"] = model_id
|
107 |
+
verified_data.append(row)
|
108 |
dataframe = pd.DataFrame.from_records(data)
|
109 |
dataframe.to_pickle("cache.pkl")
|
110 |
+
verified_dataframe = pd.DataFrame.from_records(verified_data)
|
111 |
+
verified_dataframe.to_pickle("verified_cache.pkl")
|
112 |
|
113 |
+
if exists("cache.pkl") and exists("verified_cache.pkl"):
|
114 |
# If we have saved the results previously, call an asynchronous process
|
115 |
# to fetch the results and update the saved file. Don't make users wait
|
116 |
# while we fetch the new results. Instead, display the old results for
|
117 |
# now. The new results should be loaded when this method
|
118 |
# is called again.
|
119 |
dataframe = pd.read_pickle("cache.pkl")
|
120 |
+
verified_dataframe = pd.read_pickle("verified_cache.pkl")
|
121 |
t = threading.Thread(name='get_data procs', target=get_data)
|
122 |
t.start()
|
123 |
else:
|
124 |
# We have to make the users wait during the first startup of this app.
|
125 |
get_data()
|
126 |
dataframe = pd.read_pickle("cache.pkl")
|
127 |
+
verified_dataframe = pd.read_pickle("verified_cache.pkl")
|
128 |
|
129 |
+
return dataframe, verified_dataframe
|
|
|
|
|
130 |
|
131 |
+
dataframe, verified_dataframe = get_data_wrapper()
|
132 |
|
133 |
st.markdown("# 🤗 Leaderboards")
|
134 |
|
135 |
+
only_verified_results = st.sidebar.checkbox(
|
136 |
+
"Filter for Verified Results",
|
137 |
+
)
|
138 |
+
|
139 |
+
if only_verified_results:
|
140 |
+
dataframe = verified_dataframe
|
141 |
+
|
142 |
+
selectable_datasets = list(set(dataframe.dataset.tolist()))
|
143 |
+
|
144 |
query_params = st.experimental_get_query_params()
|
145 |
default_dataset = "common_voice"
|
146 |
if "dataset" in query_params:
|
147 |
if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
|
148 |
default_dataset = query_params["dataset"][0]
|
149 |
|
|
|
|
|
|
|
|
|
150 |
dataset = st.sidebar.selectbox(
|
151 |
"Dataset",
|
152 |
selectable_datasets,
|
|
|
158 |
dataset_df = dataframe[dataframe.dataset == dataset]
|
159 |
dataset_df = dataset_df.dropna(axis="columns", how="all")
|
160 |
|
|
|
|
|
|
|
161 |
selectable_configs = list(set(dataset_df["config"]))
|
162 |
config = st.sidebar.selectbox(
|
163 |
"Config",
|
|
|
172 |
)
|
173 |
dataset_df = dataset_df[dataset_df.split == split]
|
174 |
|
175 |
+
selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config"), dataset_df.columns))
|
176 |
|
177 |
dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
|
178 |
dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
|